From 6db8e86c6b878d7df37377775c9ef8e290b3b4ae Mon Sep 17 00:00:00 2001
From: Wolfgang Sourdeau <wolfgang.sourdeau@iperceptions.com>
Date: Fri, 18 Aug 2017 15:20:45 -0400
Subject: [PATCH] updated liblz4 to version 1.8
 (https://github.com/lz4/lz4/releases/tag/v1.8.0)

---
 lz4/README            |    1 +
 lz4/lz4.c             | 1769 +++++++++++++++++++++++++++--------------
 lz4/lz4.h             |  570 ++++++++-----
 lz4/lz4.mk            |    5 +-
 lz4/lz4cli.c          | 1266 -----------------------------
 lz4/lz4cli/COPYING    |  339 ++++++++
 lz4/lz4cli/README.md  |   71 ++
 lz4/lz4cli/bench.c    |  521 ++++++++++++
 lz4/lz4cli/bench.h    |   37 +
 lz4/lz4cli/datagen.c  |  189 +++++
 lz4/lz4cli/datagen.h  |   40 +
 lz4/lz4cli/lz4.1      |  220 +++++
 lz4/lz4cli/lz4.1.md   |  218 +++++
 lz4/lz4cli/lz4cli.c   |  647 +++++++++++++++
 lz4/lz4cli/lz4cli.mk  |    3 +
 lz4/lz4cli/lz4io.c    | 1045 ++++++++++++++++++++++++
 lz4/lz4cli/lz4io.h    |  101 +++
 lz4/lz4cli/platform.h |  154 ++++
 lz4/lz4cli/util.h     |  494 ++++++++++++
 lz4/lz4frame.c        | 1669 ++++++++++++++++++++++++++++++++++++++
 lz4/lz4frame.h        |  391 +++++++++
 lz4/lz4frame_static.h |  143 ++++
 lz4/lz4hc.c           | 1223 ++++++++++++++--------------
 lz4/lz4hc.h           |  311 +++++---
 lz4/lz4opt.h          |  366 +++++++++
 lz4/xxhash.c          |  971 +++++++++++++++-------
 lz4/xxhash.h          |  279 +++++--
 27 files changed, 9937 insertions(+), 3106 deletions(-)
 create mode 100644 lz4/README
 delete mode 100644 lz4/lz4cli.c
 create mode 100644 lz4/lz4cli/COPYING
 create mode 100644 lz4/lz4cli/README.md
 create mode 100644 lz4/lz4cli/bench.c
 create mode 100644 lz4/lz4cli/bench.h
 create mode 100644 lz4/lz4cli/datagen.c
 create mode 100644 lz4/lz4cli/datagen.h
 create mode 100644 lz4/lz4cli/lz4.1
 create mode 100644 lz4/lz4cli/lz4.1.md
 create mode 100644 lz4/lz4cli/lz4cli.c
 create mode 100644 lz4/lz4cli/lz4cli.mk
 create mode 100644 lz4/lz4cli/lz4io.c
 create mode 100644 lz4/lz4cli/lz4io.h
 create mode 100644 lz4/lz4cli/platform.h
 create mode 100644 lz4/lz4cli/util.h
 create mode 100644 lz4/lz4frame.c
 create mode 100644 lz4/lz4frame.h
 create mode 100644 lz4/lz4frame_static.h
 create mode 100644 lz4/lz4opt.h

diff --git a/lz4/README b/lz4/README
new file mode 100644
index 0000000..e2902d3
--- /dev/null
+++ b/lz4/README
@@ -0,0 +1 @@
+This version of liblz4 uses the files from https://github.com/lz4/lz4/releases/tag/v1.8.0.
diff --git a/lz4/lz4.c b/lz4/lz4.c
index f521b0f..41c0a28 100644
--- a/lz4/lz4.c
+++ b/lz4/lz4.c
@@ -1,6 +1,7 @@
 /*
    LZ4 - Fast LZ compression algorithm
-   Copyright (C) 2011-2013, Yann Collet.
+   Copyright (C) 2011-2017, Yann Collet.
+
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
@@ -27,113 +28,96 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - LZ4 source repository : http://code.google.com/p/lz4/
-   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
 */
 
-//**************************************
-// Tuning parameters
-//**************************************
-// MEMORY_USAGE :
-// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
-// Increasing memory usage improves compression ratio
-// Reduced memory usage can improve speed, due to cache effect
-// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
-#define MEMORY_USAGE 14
-
-// HEAPMODE :
-// Select how default compression functions will allocate memory for their hash table,
-// in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)).
-#define HEAPMODE 0
-
-
-//**************************************
-// CPU Feature Detection
-//**************************************
-// 32 or 64 bits ?
-#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
-  || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
-  || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
-  || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) )   // Detects 64 bits mode
-#  define LZ4_ARCH64 1
-#else
-#  define LZ4_ARCH64 0
-#endif
 
-// Little Endian or Big Endian ?
-// Overwrite the #define below if you know your architecture endianess
-#if defined (__GLIBC__)
-#  include <endian.h>
-#  if (__BYTE_ORDER == __BIG_ENDIAN)
-#     define LZ4_BIG_ENDIAN 1
-#  endif
-#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
-#  define LZ4_BIG_ENDIAN 1
-#elif defined(__sparc) || defined(__sparc__) \
-   || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
-   || defined(__hpux)  || defined(__hppa) \
-   || defined(_MIPSEB) || defined(__s390__)
-#  define LZ4_BIG_ENDIAN 1
-#else
-// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
+/*-************************************
+*  Tuning parameters
+**************************************/
+/*
+ * LZ4_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4_HEAPMODE
+#  define LZ4_HEAPMODE 0
 #endif
 
-// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
-// For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property
-// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
-#if defined(__ARM_FEATURE_UNALIGNED)
-#  define LZ4_FORCE_UNALIGNED_ACCESS 1
+/*
+ * ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
+ */
+#define ACCELERATION_DEFAULT 1
+
+
+/*-************************************
+*  CPU Feature Detection
+**************************************/
+/* LZ4_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets which assembly generation depends on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define LZ4_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || defined(__GNUC__)
+#    define LZ4_FORCE_MEMORY_ACCESS 1
+#  endif
 #endif
 
-// Define this parameter if your target system or compiler does not support hardware bit count
-#if defined(_MSC_VER) && defined(_WIN32_WCE)            // Visual Studio for Windows CE does not support Hardware bit count
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system or compiler does not support hardware bit count
+ */
+#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for Windows CE does not support Hardware bit count */
 #  define LZ4_FORCE_SW_BITCOUNT
 #endif
 
-// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
-// This option may provide a small boost to performance for some big endian cpu, although probably modest.
-// You may set this option to 1 if data will remain within closed environment.
-// This option is useless on Little_Endian CPU (such as x86)
-//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
 
+/*-************************************
+*  Dependency
+**************************************/
+#include "lz4.h"
+/* see also "memory routines" below */
 
-//**************************************
-// Compiler Options
-//**************************************
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   // C99
-/* "restrict" is a known keyword */
-#else
-#  define restrict // Disable restrict
-#endif
-
-#ifdef _MSC_VER    // Visual Studio
-#  define FORCE_INLINE static __forceinline
-#  include <intrin.h>                    // For Visual 2005
-#  if LZ4_ARCH64   // 64-bits
-#    pragma intrinsic(_BitScanForward64) // For Visual 2005
-#    pragma intrinsic(_BitScanReverse64) // For Visual 2005
-#  else            // 32-bits
-#    pragma intrinsic(_BitScanForward)   // For Visual 2005
-#    pragma intrinsic(_BitScanReverse)   // For Visual 2005
-#  endif
-#  pragma warning(disable : 4127)        // disable: C4127: conditional expression is constant
-#else
-#  ifdef __GNUC__
-#    define FORCE_INLINE static inline __attribute__((always_inline))
-#  else
-#    define FORCE_INLINE static inline
-#  endif
-#endif
-
-#ifdef _MSC_VER
-#  define lz4_bswap16(x) _byteswap_ushort(x)
-#else
-#  define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
-#endif
 
-#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+/*-************************************
+*  Compiler Options
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4293)        /* disable: C4293: too large shift (32-bits) */
+#endif  /* _MSC_VER */
 
-#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
+#ifndef FORCE_INLINE
+#  ifdef _MSC_VER    /* Visual Studio */
+#    define FORCE_INLINE static __forceinline
+#  else
+#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#      ifdef __GNUC__
+#        define FORCE_INLINE static inline __attribute__((always_inline))
+#      else
+#        define FORCE_INLINE static inline
+#      endif
+#    else
+#      define FORCE_INLINE static
+#    endif /* __STDC_VERSION__ */
+#  endif  /* _MSC_VER */
+#endif /* FORCE_INLINE */
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
 #else
 #  define expect(expr,value)    (expr)
@@ -143,89 +127,155 @@
 #define unlikely(expr)   expect((expr) != 0, 0)
 
 
-//**************************************
-// Memory routines
-//**************************************
-#include <stdlib.h>   // malloc, calloc, free
+/*-************************************
+*  Memory routines
+**************************************/
+#include <stdlib.h>   /* malloc, calloc, free */
 #define ALLOCATOR(n,s) calloc(n,s)
 #define FREEMEM        free
-#include <string.h>   // memset, memcpy
+#include <string.h>   /* memset, memcpy */
 #define MEM_INIT       memset
 
 
-//**************************************
-// Includes
-//**************************************
-#include "lz4.h"
-
-
-//**************************************
-// Basic Types
-//**************************************
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
+/*-************************************
+*  Basic Types
+**************************************/
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 # include <stdint.h>
   typedef  uint8_t BYTE;
   typedef uint16_t U16;
   typedef uint32_t U32;
   typedef  int32_t S32;
   typedef uint64_t U64;
+  typedef uintptr_t uptrval;
 #else
   typedef unsigned char       BYTE;
   typedef unsigned short      U16;
   typedef unsigned int        U32;
   typedef   signed int        S32;
   typedef unsigned long long  U64;
+  typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
 #endif
 
-#if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
-#  define _PACKED __attribute__ ((packed))
+#if defined(__x86_64__)
+  typedef U64    reg_t;   /* 64-bits in x32 mode */
 #else
-#  define _PACKED
+  typedef size_t reg_t;   /* 32-bits in x32 mode */
 #endif
 
-#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
-#    pragma pack(1)
-#  else
-#    pragma pack(push, 1)
-#  endif
-#endif
+/*-************************************
+*  Reading and writing into memory
+**************************************/
+static unsigned LZ4_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
+    return one.c[0];
+}
 
-typedef struct { U16 v; }  _PACKED U16_S;
-typedef struct { U32 v; }  _PACKED U32_S;
-typedef struct { U64 v; }  _PACKED U64_S;
-typedef struct {size_t v;} _PACKED size_t_S;
 
-#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  if defined(__SUNPRO_C) || defined(__SUNPRO_CC)
-#    pragma pack(0)
-#  else
-#    pragma pack(pop)
-#  endif
-#endif
+#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
+/* lie to the compiler about data alignment; use with caution */
+
+static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
+
+static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+
+#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
+
+static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; }
+
+static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+
+#else  /* safe and portable access through memcpy() */
+
+static U16 LZ4_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
 
-#define A16(x)   (((U16_S *)(x))->v)
-#define A32(x)   (((U32_S *)(x))->v)
-#define A64(x)   (((U64_S *)(x))->v)
-#define AARCH(x) (((size_t_S *)(x))->v)
+static U32 LZ4_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
 
+static reg_t LZ4_read_ARCH(const void* memPtr)
+{
+    reg_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
 
-//**************************************
-// Constants
-//**************************************
-#define LZ4_HASHLOG   (MEMORY_USAGE-2)
-#define HASHTABLESIZE (1 << MEMORY_USAGE)
-#define HASHNBCELLS4  (1 << LZ4_HASHLOG)
+static void LZ4_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+static void LZ4_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* LZ4_FORCE_MEMORY_ACCESS */
+
+
+static U16 LZ4_readLE16(const void* memPtr)
+{
+    if (LZ4_isLittleEndian()) {
+        return LZ4_read16(memPtr);
+    } else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + (p[1]<<8));
+    }
+}
+
+static void LZ4_writeLE16(void* memPtr, U16 value)
+{
+    if (LZ4_isLittleEndian()) {
+        LZ4_write16(memPtr, value);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE) value;
+        p[1] = (BYTE)(value>>8);
+    }
+}
 
+static void LZ4_copy8(void* dst, const void* src)
+{
+    memcpy(dst,src,8);
+}
+
+/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
+static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
+}
+
+
+/*-************************************
+*  Common Constants
+**************************************/
 #define MINMATCH 4
 
-#define COPYLENGTH 8
+#define WILDCOPYLENGTH 8
 #define LASTLITERALS 5
-#define MFLIMIT (COPYLENGTH+MINMATCH)
-const int LZ4_minLength = (MFLIMIT+1);
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
 
-#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
-#define SKIPSTRENGTH 6     // Increasing this value will make the compression run slower on incompressible data
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
 
 #define MAXD_LOG 16
 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
@@ -235,631 +285,1194 @@ const int LZ4_minLength = (MFLIMIT+1);
 #define RUN_BITS (8-ML_BITS)
 #define RUN_MASK ((1U<<RUN_BITS)-1)
 
-#define KB *(1U<<10)
-#define MB *(1U<<20)
-#define GB *(1U<<30)
 
+/*-************************************
+*  Error detection
+**************************************/
+#define LZ4_STATIC_ASSERT(c)    { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 
-//**************************************
-// Structures and local types
-//**************************************
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
+#  include <stdio.h>
+#  define DEBUGLOG(l, ...) {                          \
+                if (l<=LZ4_DEBUG) {                   \
+                    fprintf(stderr, __FILE__ ": ");   \
+                    fprintf(stderr, __VA_ARGS__);     \
+                    fprintf(stderr, " \n");           \
+            }   }
+#else
+#  define DEBUGLOG(l, ...)      {}    /* disabled */
+#endif
 
-typedef struct {
-    U32 hashTable[HASHNBCELLS4];
-    const BYTE* bufferStart;
-    const BYTE* base;
-    const BYTE* nextBlock;
-} LZ4_Data_Structure;
 
-typedef enum { notLimited = 0, limited = 1 } limitedOutput_directive;
-typedef enum { byPtr, byU32, byU16 } tableType_t;
+/*-************************************
+*  Common functions
+**************************************/
+static unsigned LZ4_NbCommonBytes (register reg_t val)
+{
+    if (LZ4_isLittleEndian()) {
+        if (sizeof(val)==8) {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (int)(r>>3);
+#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        } else /* 32 bits */ {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward( &r, (U32)val );
+            return (int)(r>>3);
+#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    } else   /* Big Endian CPU */ {
+        if (sizeof(val)==8) {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clzll((U64)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        } else /* 32 bits */ {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+    }
+}
 
-typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
+#define STEPSIZE sizeof(reg_t)
+static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
 
-typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
-typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+    while (likely(pIn<pInLimit-(STEPSIZE-1))) {
+        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+        pIn += LZ4_NbCommonBytes(diff);
+        return (unsigned)(pIn - pStart);
+    }
 
+    if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (unsigned)(pIn - pStart);
+}
 
-//**************************************
-// Architecture-specific macros
-//**************************************
-#define STEPSIZE                  sizeof(size_t)
-#define LZ4_COPYSTEP(d,s)         { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; }
-#define LZ4_COPY8(d,s)            { LZ4_COPYSTEP(d,s); if (STEPSIZE<8) LZ4_COPYSTEP(d,s); }
-#define LZ4_SECURECOPY(d,s,e)     { if ((STEPSIZE==4)||(d<e)) LZ4_WILDCOPY(d,s,e); }
-
-#if LZ4_ARCH64   // 64-bit
-#  define HTYPE                   U32
-#  define INITBASE(base)          const BYTE* const base = ip
-#else            // 32-bit
-#  define HTYPE                   const BYTE*
-#  define INITBASE(base)          const int base = 0
-#endif
 
-#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
-#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
-#  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
-#else      // Little Endian
-#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
-#  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
-#endif
+#ifndef LZ4_COMMONDEFS_ONLY
+/*-************************************
+*  Local Constants
+**************************************/
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
 
 
-//**************************************
-// Macros
-//**************************************
-#define LZ4_WILDCOPY(d,s,e)     { do { LZ4_COPY8(d,s) } while (d<e); }           // at the end, d>=e;
-
-
-//****************************
-// Private functions
-//****************************
-#if LZ4_ARCH64
-
-FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
-{
-# if defined(LZ4_BIG_ENDIAN)
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanReverse64( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clzll(val) >> 3);
-#   else
-    int r;
-    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
-    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
-    r += (!val);
-    return r;
-#   endif
-# else
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanForward64( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctzll(val) >> 3);
-#   else
-    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
-    return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
-#   endif
-# endif
-}
+/*-************************************
+*  Local Structures and types
+**************************************/
+typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
 
-#else
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 
-FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
-{
-# if defined(LZ4_BIG_ENDIAN)
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanReverse( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clz(val) >> 3);
-#   else
-    int r;
-    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-    r += (!val);
-    return r;
-#   endif
-# else
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r;
-    _BitScanForward( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctz(val) >> 3);
-#   else
-    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
-    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
-#   endif
-# endif
-}
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
 
-#endif
+
+/*-************************************
+*  Local Utils
+**************************************/
+int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
+const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
+int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
+int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
 
 
-//****************************
-// Compression functions
-//****************************
-FORCE_INLINE int LZ4_hashSequence(U32 sequence, tableType_t tableType)
+/*-******************************
+*  Compression functions
+********************************/
+static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 {
     if (tableType == byU16)
-        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+        return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+    static const U64 prime5bytes = 889523592379ULL;
+    static const U64 prime8bytes = 11400714785074694791ULL;
+    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
+    if (LZ4_isLittleEndian())
+        return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
     else
-        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+        return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 }
 
-FORCE_INLINE int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); }
+FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
+{
+    if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+    return LZ4_hash4(LZ4_read32(p), tableType);
+}
 
-FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase)
 {
     switch (tableType)
     {
-    case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; }
-    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; }
-    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; }
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
     }
 }
 
 FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
 {
-    U32 h = LZ4_hashPosition(p, tableType);
+    U32 const h = LZ4_hashPosition(p, tableType);
     LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
 }
 
-FORCE_INLINE const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
 {
     if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
-    if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
-    { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   // default, to ensure a return
+    if (tableType == byU32) { const U32* const hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
+    { const U16* const hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
 }
 
 FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
 {
-    U32 h = LZ4_hashPosition(p, tableType);
+    U32 const h = LZ4_hashPosition(p, tableType);
     return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
 }
 
 
+/** LZ4_compress_generic() :
+    inlined, to ensure branches are decided at compilation time */
 FORCE_INLINE int LZ4_compress_generic(
-                 void* ctx,
-                 const char* source,
-                 char* dest,
-                 int inputSize,
-                 int maxOutputSize,
-
-                 limitedOutput_directive limitedOutput,
-                 tableType_t tableType,
-                 prefix64k_directive prefix)
+                 LZ4_stream_t_internal* const cctx,
+                 const char* const source,
+                 char* const dest,
+                 const int inputSize,
+                 const int maxOutputSize,
+                 const limitedOutput_directive outputLimited,
+                 const tableType_t tableType,
+                 const dict_directive dict,
+                 const dictIssue_directive dictIssue,
+                 const U32 acceleration)
 {
     const BYTE* ip = (const BYTE*) source;
-    const BYTE* const base = (prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->base : (const BYTE*) source;
-    const BYTE* const lowLimit = ((prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->bufferStart : (const BYTE*)source);
+    const BYTE* base;
+    const BYTE* lowLimit;
+    const BYTE* const lowRefLimit = ip - cctx->dictSize;
+    const BYTE* const dictionary = cctx->dictionary;
+    const BYTE* const dictEnd = dictionary + cctx->dictSize;
+    const ptrdiff_t dictDelta = dictEnd - (const BYTE*)source;
     const BYTE* anchor = (const BYTE*) source;
     const BYTE* const iend = ip + inputSize;
     const BYTE* const mflimit = iend - MFLIMIT;
     const BYTE* const matchlimit = iend - LASTLITERALS;
 
     BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
+    BYTE* const olimit = op + maxOutputSize;
 
-    int length;
-    const int skipStrength = SKIPSTRENGTH;
     U32 forwardH;
 
-    // Init conditions
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;                                // Unsupported input size, too large (or negative)
-    if ((prefix==withPrefix) && (ip != ((LZ4_Data_Structure*)ctx)->nextBlock)) return 0;   // must continue from end of previous block
-    if (prefix==withPrefix) ((LZ4_Data_Structure*)ctx)->nextBlock=iend;                    // do it now, due to potential early exit
-    if ((tableType == byU16) && (inputSize>=LZ4_64KLIMIT)) return 0;                       // Size too large (not within 64K limit)
-    if (inputSize<LZ4_minLength) goto _last_literals;                                      // Input too small, no compression (all literals)
+    /* Init conditions */
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported inputSize, too large (or negative) */
+    switch(dict)
+    {
+    case noDict:
+    default:
+        base = (const BYTE*)source;
+        lowLimit = (const BYTE*)source;
+        break;
+    case withPrefix64k:
+        base = (const BYTE*)source - cctx->currentOffset;
+        lowLimit = (const BYTE*)source - cctx->dictSize;
+        break;
+    case usingExtDict:
+        base = (const BYTE*)source - cctx->currentOffset;
+        lowLimit = (const BYTE*)source;
+        break;
+    }
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    if (inputSize<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
 
-    // First Byte
-    LZ4_putPosition(ip, ctx, tableType, base);
+    /* First Byte */
+    LZ4_putPosition(ip, cctx->hashTable, tableType, base);
     ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
-    // Main Loop
-    for ( ; ; )
-    {
-        int findMatchAttempts = (1U << skipStrength) + 3;
-        const BYTE* forwardIp = ip;
-        const BYTE* ref;
+    /* Main Loop */
+    for ( ; ; ) {
+        ptrdiff_t refDelta = 0;
+        const BYTE* match;
         BYTE* token;
 
-        // Find a match
-        do {
-            U32 h = forwardH;
-            int step = findMatchAttempts++ >> skipStrength;
-            ip = forwardIp;
-            forwardIp = ip + step;
-
-            if unlikely(forwardIp > mflimit) { goto _last_literals; }
-
-            forwardH = LZ4_hashPosition(forwardIp, tableType);
-            ref = LZ4_getPositionOnHash(h, ctx, tableType, base);
-            LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
-
-        } while ((ref + MAX_DISTANCE < ip) || (A32(ref) != A32(ip)));
+        /* Find a match */
+        {   const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+
+                match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
+                if (dict==usingExtDict) {
+                    if (match < (const BYTE*)source) {
+                        refDelta = dictDelta;
+                        lowLimit = dictionary;
+                    } else {
+                        refDelta = 0;
+                        lowLimit = (const BYTE*)source;
+                }   }
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
+
+            } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0)
+                || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
+                || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) );
+        }
 
-        // Catch up
-        while ((ip>anchor) && (ref > lowLimit) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
+        /* Catch up */
+        while (((ip>anchor) & (match+refDelta > lowLimit)) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; }
+
+        /* Encode Literals */
+        {   unsigned const litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if ((outputLimited) &&  /* Check output buffer overflow */
+                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
+                return 0;
+            if (litLength >= RUN_MASK) {
+                int len = (int)litLength-RUN_MASK;
+                *token = (RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
 
-        // Encode Literal length
-        length = (int)(ip - anchor);
-        token = op++;
-        if ((limitedOutput) && unlikely(op + length + (2 + 1 + LASTLITERALS) + (length/255) > oend)) return 0;   // Check output limit
-        if (length>=(int)RUN_MASK)
-        {
-            int len = length-RUN_MASK;
-            *token=(RUN_MASK<<ML_BITS);
-            for(; len >= 255 ; len-=255) *op++ = 255;
-            *op++ = (BYTE)len;
+            /* Copy Literals */
+            LZ4_wildCopy(op, anchor, op+litLength);
+            op+=litLength;
         }
-        else *token = (BYTE)(length<<ML_BITS);
-
-        // Copy Literals
-        { BYTE* end=(op)+(length); LZ4_WILDCOPY(op,anchor,end); op=end; }
 
 _next_match:
-        // Encode Offset
-        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
+        /* Encode Offset */
+        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+
+        /* Encode MatchLength */
+        {   unsigned matchCode;
+
+            if ((dict==usingExtDict) && (lowLimit==dictionary)) {
+                const BYTE* limit;
+                match += refDelta;
+                limit = ip + (dictEnd-match);
+                if (limit > matchlimit) limit = matchlimit;
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+                ip += MINMATCH + matchCode;
+                if (ip==limit) {
+                    unsigned const more = LZ4_count(ip, (const BYTE*)source, matchlimit);
+                    matchCode += more;
+                    ip += more;
+                }
+            } else {
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+                ip += MINMATCH + matchCode;
+            }
 
-        // Start Counting
-        ip+=MINMATCH; ref+=MINMATCH;    // MinMatch already verified
-        anchor = ip;
-        while likely(ip<matchlimit-(STEPSIZE-1))
-        {
-            size_t diff = AARCH(ref) ^ AARCH(ip);
-            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
-            ip += LZ4_NbCommonBytes(diff);
-            goto _endCount;
+            if ( outputLimited &&    /* Check output buffer overflow */
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) )
+                return 0;
+            if (matchCode >= ML_MASK) {
+                *token += ML_MASK;
+                matchCode -= ML_MASK;
+                LZ4_write32(op, 0xFFFFFFFF);
+                while (matchCode >= 4*255) op+=4, LZ4_write32(op, 0xFFFFFFFF), matchCode -= 4*255;
+                op += matchCode / 255;
+                *op++ = (BYTE)(matchCode % 255);
+            } else
+                *token += (BYTE)(matchCode);
         }
-        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
-        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
-        if ((ip<matchlimit) && (*ref == *ip)) ip++;
-_endCount:
-
-        // Encode MatchLength
-        length = (int)(ip - anchor);
-        if ((limitedOutput) && unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend)) return 0;    // Check output limit
-        if (length>=(int)ML_MASK)
-        {
-            *token += ML_MASK;
-            length -= ML_MASK;
-            for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; }
-            if (length >= 255) { length-=255; *op++ = 255; }
-            *op++ = (BYTE)length;
-        }
-        else *token += (BYTE)(length);
-
-        // Test end of chunk
-        if (ip > mflimit) { anchor = ip;  break; }
 
-        // Fill table
-        LZ4_putPosition(ip-2, ctx, tableType, base);
-
-        // Test next position
-        ref = LZ4_getPosition(ip, ctx, tableType, base);
-        LZ4_putPosition(ip, ctx, tableType, base);
-        if ((ref + MAX_DISTANCE >= ip) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
+        anchor = ip;
 
-        // Prepare next loop
-        anchor = ip++;
-        forwardH = LZ4_hashPosition(ip, tableType);
+        /* Test end of chunk */
+        if (ip > mflimit) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
+
+        /* Test next position */
+        match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
+        if (dict==usingExtDict) {
+            if (match < (const BYTE*)source) {
+                refDelta = dictDelta;
+                lowLimit = dictionary;
+            } else {
+                refDelta = 0;
+                lowLimit = (const BYTE*)source;
+        }   }
+        LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+        if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1)
+            && (match+MAX_DISTANCE>=ip)
+            && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) )
+        { token=op++; *token=0; goto _next_match; }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
     }
 
 _last_literals:
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-        if ((limitedOutput) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;   // Check output limit
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-        else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
+    /* Encode Last Literals */
+    {   size_t const lastRun = (size_t)(iend - anchor);
+        if ( (outputLimited) &&  /* Check output buffer overflow */
+            ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) )
+            return 0;
+        if (lastRun >= RUN_MASK) {
+            size_t accumulator = lastRun - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRun<<ML_BITS);
+        }
+        memcpy(op, anchor, lastRun);
+        op += lastRun;
     }
 
-    // End
+    /* End */
     return (int) (((char*)op)-dest);
 }
 
 
-int LZ4_compress(const char* source, char* dest, int inputSize)
+int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
-#if (HEAPMODE)
-    void* ctx = ALLOCATOR(HASHNBCELLS4, 4);   // Aligned on 4-bytes boundaries
-#else
-    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};      // Ensure data is aligned on 4-bytes boundaries
-#endif
-    int result;
-
-    if (inputSize < (int)LZ4_64KLIMIT)
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noPrefix);
-    else
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
-
-#if (HEAPMODE)
-    FREEMEM(ctx);
-#endif
-    return result;
+    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
+    LZ4_resetStream((LZ4_stream_t*)state);
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+
+    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+        if (inputSize < LZ4_64Klimit)
+            return LZ4_compress_generic(ctx, source, dest, inputSize,             0,    notLimited,                        byU16, noDict, noDictIssue, acceleration);
+        else
+            return LZ4_compress_generic(ctx, source, dest, inputSize,             0,    notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    } else {
+        if (inputSize < LZ4_64Klimit)
+            return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput,                        byU16, noDict, noDictIssue, acceleration);
+        else
+            return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    }
 }
 
-int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
-#if (HEAPMODE)
-    void* ctx = ALLOCATOR(HASHNBCELLS4, 4);   // Aligned on 4-bytes boundaries
+#if (LZ4_HEAPMODE)
+    void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
 #else
-    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};      // Ensure data is aligned on 4-bytes boundaries
+    LZ4_stream_t ctx;
+    void* const ctxPtr = &ctx;
 #endif
-    int result;
 
-    if (inputSize < (int)LZ4_64KLIMIT)
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, byU16, noPrefix);
-    else
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
+    int const result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
 
-#if (HEAPMODE)
-    FREEMEM(ctx);
+#if (LZ4_HEAPMODE)
+    FREEMEM(ctxPtr);
 #endif
     return result;
 }
 
 
-//*****************************
-// Using an external allocation
-//*****************************
-
-int LZ4_sizeofState() { return 1 << MEMORY_USAGE; }
+int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1);
+}
 
 
-int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize)
+/* hidden debug function */
+/* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
+int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
-    if (((size_t)(state)&3) != 0) return 0;   // Error : state is not aligned on 4-bytes boundary
-    MEM_INIT(state, 0, LZ4_sizeofState());
+    LZ4_stream_t ctx;
+    LZ4_resetStream(&ctx);
 
-    if (inputSize < (int)LZ4_64KLIMIT)
-        return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noPrefix);
+    if (inputSize < LZ4_64Klimit)
+        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
     else
-        return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
+        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, maxOutputSize, limitedOutput, sizeof(void*)==8 ? byU32 : byPtr, noDict, noDictIssue, acceleration);
 }
 
 
-int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize)
+/*-******************************
+*  *_destSize() variant
+********************************/
+
+static int LZ4_compress_destSize_generic(
+                       LZ4_stream_t_internal* const ctx,
+                 const char* const src,
+                       char* const dst,
+                       int*  const srcSizePtr,
+                 const int targetDstSize,
+                 const tableType_t tableType)
 {
-    if (((size_t)(state)&3) != 0) return 0;   // Error : state is not aligned on 4-bytes boundary
-    MEM_INIT(state, 0, LZ4_sizeofState());
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* base = (const BYTE*) src;
+    const BYTE* lowLimit = (const BYTE*) src;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + *srcSizePtr;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
 
-    if (inputSize < (int)LZ4_64KLIMIT)
-        return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limited, byU16, noPrefix);
-    else
-        return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
+    BYTE* op = (BYTE*) dst;
+    BYTE* const oend = op + targetDstSize;
+    BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */;
+    BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */);
+    BYTE* const oMaxSeq = oMaxLit - 1 /* token */;
+
+    U32 forwardH;
+
+
+    /* Init conditions */
+    if (targetDstSize < 1) return 0;                                     /* Impossible to store anything */
+    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;            /* Unsupported input size, too large (or negative) */
+    if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    if (*srcSizePtr<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    *srcSizePtr = 0;
+    LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; ) {
+        const BYTE* match;
+        BYTE* token;
+
+        /* Find a match */
+        {   const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = 1 << LZ4_skipTrigger;
+
+            do {
+                U32 h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+
+                match = LZ4_getPositionOnHash(h, ctx->hashTable, tableType, base);
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, ctx->hashTable, tableType, base);
+
+            } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
+                || (LZ4_read32(match) != LZ4_read32(ip)) );
+        }
+
+        /* Catch up */
+        while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
+
+        /* Encode Literal length */
+        {   unsigned litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if (op + ((litLength+240)/255) + litLength > oMaxLit) {
+                /* Not enough space for a last match */
+                op--;
+                goto _last_literals;
+            }
+            if (litLength>=RUN_MASK) {
+                unsigned len = litLength - RUN_MASK;
+                *token=(RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy(op, anchor, op+litLength);
+            op += litLength;
+        }
+
+_next_match:
+        /* Encode Offset */
+        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+
+        /* Encode MatchLength */
+        {   size_t matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+
+            if (op + ((matchLength+240)/255) > oMaxMatch) {
+                /* Match description too long : reduce it */
+                matchLength = (15-1) + (oMaxMatch-op) * 255;
+            }
+            ip += MINMATCH + matchLength;
+
+            if (matchLength>=ML_MASK) {
+                *token += ML_MASK;
+                matchLength -= ML_MASK;
+                while (matchLength >= 255) { matchLength-=255; *op++ = 255; }
+                *op++ = (BYTE)matchLength;
+            }
+            else *token += (BYTE)(matchLength);
+        }
+
+        anchor = ip;
+
+        /* Test end of block */
+        if (ip > mflimit) break;
+        if (op > oMaxSeq) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, ctx->hashTable, tableType, base);
+
+        /* Test next position */
+        match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
+        LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+        if ( (match+MAX_DISTANCE>=ip)
+            && (LZ4_read32(match)==LZ4_read32(ip)) )
+        { token=op++; *token=0; goto _next_match; }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+    }
+
+_last_literals:
+    /* Encode Last Literals */
+    {   size_t lastRunSize = (size_t)(iend - anchor);
+        if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend) {
+            /* adapt lastRunSize to fill 'dst' */
+            lastRunSize  = (oend-op) - 1;
+            lastRunSize -= (lastRunSize+240)/255;
+        }
+        ip = anchor + lastRunSize;
+
+        if (lastRunSize >= RUN_MASK) {
+            size_t accumulator = lastRunSize - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRunSize<<ML_BITS);
+        }
+        memcpy(op, anchor, lastRunSize);
+        op += lastRunSize;
+    }
+
+    /* End */
+    *srcSizePtr = (int) (((const char*)ip)-src);
+    return (int) (((char*)op)-dst);
 }
 
 
-//****************************
-// Stream functions
-//****************************
+static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+    LZ4_resetStream(state);
+
+    if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
+        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
+    } else {
+        if (*srcSizePtr < LZ4_64Klimit)
+            return LZ4_compress_destSize_generic(&state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, byU16);
+        else
+            return LZ4_compress_destSize_generic(&state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, sizeof(void*)==8 ? byU32 : byPtr);
+    }
+}
+
 
-int LZ4_sizeofStreamState()
+int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
 {
-    return sizeof(LZ4_Data_Structure);
+#if (LZ4_HEAPMODE)
+    LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+#else
+    LZ4_stream_t ctxBody;
+    LZ4_stream_t* ctx = &ctxBody;
+#endif
+
+    int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
+
+#if (LZ4_HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
 }
 
-FORCE_INLINE void LZ4_init(LZ4_Data_Structure* lz4ds, const BYTE* base)
+
+
+/*-******************************
+*  Streaming functions
+********************************/
+
+LZ4_stream_t* LZ4_createStream(void)
 {
-    MEM_INIT(lz4ds->hashTable, 0, sizeof(lz4ds->hashTable));
-    lz4ds->bufferStart = base;
-    lz4ds->base = base;
-    lz4ds->nextBlock = base;
+    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64);
+    LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+    LZ4_resetStream(lz4s);
+    return lz4s;
 }
 
-int LZ4_resetStreamState(void* state, const char* inputBuffer)
+void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
 {
-    if ((((size_t)state) & 3) != 0) return 1;   // Error : pointer is not aligned on 4-bytes boundary
-    LZ4_init((LZ4_Data_Structure*)state, (const BYTE*)inputBuffer);
-    return 0;
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
 }
 
-void* LZ4_create (const char* inputBuffer)
+int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
 {
-    void* lz4ds = ALLOCATOR(1, sizeof(LZ4_Data_Structure));
-    LZ4_init ((LZ4_Data_Structure*)lz4ds, (const BYTE*)inputBuffer);
-    return lz4ds;
+    if (!LZ4_stream) return 0;   /* support free on NULL */
+    FREEMEM(LZ4_stream);
+    return (0);
 }
 
 
-int LZ4_free (void* LZ4_Data)
+#define HASH_UNIT sizeof(reg_t)
+int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
 {
-    FREEMEM(LZ4_Data);
-    return (0);
+    LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
+    const BYTE* p = (const BYTE*)dictionary;
+    const BYTE* const dictEnd = p + dictSize;
+    const BYTE* base;
+
+    if ((dict->initCheck) || (dict->currentOffset > 1 GB))  /* Uninitialized structure, or reuse overflow */
+        LZ4_resetStream(LZ4_dict);
+
+    if (dictSize < (int)HASH_UNIT) {
+        dict->dictionary = NULL;
+        dict->dictSize = 0;
+        return 0;
+    }
+
+    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+    dict->currentOffset += 64 KB;
+    base = p - dict->currentOffset;
+    dict->dictionary = p;
+    dict->dictSize = (U32)(dictEnd - p);
+    dict->currentOffset += dict->dictSize;
+
+    while (p <= dictEnd-HASH_UNIT) {
+        LZ4_putPosition(p, dict->hashTable, byU32, base);
+        p+=3;
+    }
+
+    return dict->dictSize;
 }
 
 
-char* LZ4_slideInputBuffer (void* LZ4_Data)
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
 {
-    LZ4_Data_Structure* lz4ds = (LZ4_Data_Structure*)LZ4_Data;
-    size_t delta = lz4ds->nextBlock - (lz4ds->bufferStart + 64 KB);
+    if ((LZ4_dict->currentOffset > 0x80000000) ||
+        ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {   /* address space overflow */
+        /* rescale hash table */
+        U32 const delta = LZ4_dict->currentOffset - 64 KB;
+        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+        int i;
+        for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
+            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+            else LZ4_dict->hashTable[i] -= delta;
+        }
+        LZ4_dict->currentOffset = 64 KB;
+        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+    }
+}
 
-    if ( (lz4ds->base - delta > lz4ds->base)                          // underflow control
-       || ((size_t)(lz4ds->nextBlock - lz4ds->base) > 0xE0000000) )   // close to 32-bits limit
-    {
-        size_t deltaLimit = (lz4ds->nextBlock - 64 KB) - lz4ds->base;
-        int nH;
 
-        for (nH=0; nH < HASHNBCELLS4; nH++)
-        {
-            if ((size_t)(lz4ds->hashTable[nH]) < deltaLimit) lz4ds->hashTable[nH] = 0;
-            else lz4ds->hashTable[nH] -= (U32)deltaLimit;
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = (const BYTE*) source;
+    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
+    if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
+    LZ4_renormDictT(streamPtr, smallest);
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+
+    /* Check overlapping input/dictionary space */
+    {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
+            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+            streamPtr->dictionary = dictEnd - streamPtr->dictSize;
         }
-        memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB);
-        lz4ds->base = lz4ds->bufferStart;
-        lz4ds->nextBlock = lz4ds->base + 64 KB;
     }
-    else
-    {
-        memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB);
-        lz4ds->nextBlock -= delta;
-        lz4ds->base -= delta;
+
+    /* prefix mode : source data follows dictionary */
+    if (dictEnd == (const BYTE*)source) {
+        int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration);
+        else
+            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration);
+        streamPtr->dictSize += (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
     }
 
-    return (char*)(lz4ds->nextBlock);
+    /* external dictionary mode */
+    {   int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration);
+        else
+            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration);
+        streamPtr->dictionary = (const BYTE*)source;
+        streamPtr->dictSize = (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
+    }
 }
 
 
-int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize)
+/* Hidden debug function, to force external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
 {
-    return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, 0, notLimited, byU32, withPrefix);
+    LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
+    int result;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = dictEnd;
+    if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
+    LZ4_renormDictT(streamPtr, smallest);
+
+    result = LZ4_compress_generic(streamPtr, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+
+    streamPtr->dictionary = (const BYTE*)source;
+    streamPtr->dictSize = (U32)inputSize;
+    streamPtr->currentOffset += (U32)inputSize;
+
+    return result;
 }
 
 
-int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
+/*! LZ4_saveDict() :
+ *  If previously compressed data block is not guaranteed to remain available at its memory location,
+ *  save it into a safer place (char* safeBuffer).
+ *  Note : you don't need to call LZ4_loadDict() afterwards,
+ *         dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
+ *  Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ */
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
 {
-    return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, maxOutputSize, limited, byU32, withPrefix);
+    LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
+    const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
+
+    if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
+
+    memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+    dict->dictionary = (const BYTE*)safeBuffer;
+    dict->dictSize = (U32)dictSize;
+
+    return dictSize;
 }
 
 
-//****************************
-// Decompression functions
-//****************************
 
-// This generic decompression function cover all use cases.
-// It shall be instanciated several times, using different sets of directives
-// Note that it is essential this generic function is really inlined,
-// in order to remove useless branches during compilation optimisation.
+/*-*****************************
+*  Decompression functions
+*******************************/
+/*! LZ4_decompress_generic() :
+ *  This generic decompression function cover all use cases.
+ *  It shall be instantiated several times, using different sets of directives
+ *  Note that it is important this generic function is really inlined,
+ *  in order to remove useless branches during compilation optimization.
+ */
 FORCE_INLINE int LZ4_decompress_generic(
-                 const char* source,
-                 char* dest,
-                 int inputSize,          //
-                 int outputSize,         // If endOnInput==endOnInputSize, this value is the max size of Output Buffer.
-
-                 int endOnInput,         // endOnOutputSize, endOnInputSize
-                 int prefix64k,          // noPrefix, withPrefix
-                 int partialDecoding,    // full, partial
-                 int targetOutputSize    // only used if partialDecoding==partial
+                 const char* const source,
+                 char* const dest,
+                 int inputSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
+
+                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
+                 int partialDecoding,    /* full, partial */
+                 int targetOutputSize,   /* only used if partialDecoding==partial */
+                 int dict,               /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* == dest when no prefix */
+                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
+                 const size_t dictSize         /* note : = 0 if noDict */
                  )
 {
-    // Local Variables
-    const BYTE* restrict ip = (const BYTE*) source;
-    const BYTE* ref;
+    /* Local Variables */
+    const BYTE* ip = (const BYTE*) source;
     const BYTE* const iend = ip + inputSize;
 
     BYTE* op = (BYTE*) dest;
     BYTE* const oend = op + outputSize;
     BYTE* cpy;
     BYTE* oexit = op + targetOutputSize;
+    const BYTE* const lowLimit = lowPrefix - dictSize;
 
-    const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};   // static reduces speed for LZ4_decompress_safe() on GCC64
-    static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
+    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
+    const unsigned dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
+    const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
 
+    const int safeDecode = (endOnInput==endOnInputSize);
+    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
 
-    // Special cases
-    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                        // targetOutputSize too high => decode everything
-    if ((endOnInput) && unlikely(outputSize==0)) return ((inputSize==1) && (*ip==0)) ? 0 : -1;   // Empty output buffer
-    if ((!endOnInput) && unlikely(outputSize==0)) return (*ip==0?1:-1);
 
+    /* Special cases */
+    if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT;                        /* targetOutputSize too high => decode everything */
+    if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
+    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
 
-    // Main Loop
-    while (1)
-    {
-        unsigned token;
+    /* Main Loop : decode sequences */
+    while (1) {
         size_t length;
-
-        // get runlength
-        token = *ip++;
-        if ((length=(token>>ML_BITS)) == RUN_MASK)
-        {
-            unsigned s=255;
-            while (((endOnInput)?ip<iend:1) && (s==255))
-            {
+        const BYTE* match;
+        size_t offset;
+
+        /* get literal length */
+        unsigned const token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK) {
+            unsigned s;
+            do {
                 s = *ip++;
                 length += s;
-            }
+            } while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) & (s==255) );
+            if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error;   /* overflow detection */
+            if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error;   /* overflow detection */
         }
 
-        // copy literals
+        /* copy literals */
         cpy = op+length;
-        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
-            || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
+        if ( ((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+            || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
         {
-            if (partialDecoding)
-            {
-                if (cpy > oend) goto _output_error;                           // Error : write attempt beyond end of output buffer
-                if ((endOnInput) && (ip+length > iend)) goto _output_error;   // Error : read attempt beyond end of input buffer
-            }
-            else
-            {
-                if ((!endOnInput) && (cpy != oend)) goto _output_error;       // Error : block decoding must stop exactly there
-                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   // Error : input must be consumed
+            if (partialDecoding) {
+                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
+                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
+            } else {
+                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
+                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
             }
             memcpy(op, ip, length);
             ip += length;
             op += length;
-            break;                                       // Necessarily EOF, due to parsing restrictions
+            break;     /* Necessarily EOF, due to parsing restrictions */
         }
-        LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy;
-
-        // get offset
-        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
-        if ((prefix64k==noPrefix) && unlikely(ref < (BYTE* const)dest)) goto _output_error;   // Error : offset outside destination buffer
-
-        // get matchlength
-        if ((length=(token&ML_MASK)) == ML_MASK)
-        {
-            while ((!endOnInput) || (ip<iend-(LASTLITERALS+1)))   // Ensure enough bytes remain for LASTLITERALS + token
-            {
-                unsigned s = *ip++;
+        LZ4_wildCopy(op, ip, cpy);
+        ip += length; op = cpy;
+
+        /* get offset */
+        offset = LZ4_readLE16(ip); ip+=2;
+        match = op - offset;
+        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside buffers */
+        LZ4_write32(op, (U32)offset);   /* costs ~1%; silence an msan warning when offset==0 */
+
+        /* get matchlength */
+        length = token & ML_MASK;
+        if (length == ML_MASK) {
+            unsigned s;
+            do {
+                s = *ip++;
+                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
                 length += s;
-                if (s==255) continue;
-                break;
-            }
+            } while (s==255);
+            if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
         }
-
-        // copy repeated sequence
-        if unlikely((op-ref)<(int)STEPSIZE)
-        {
-            const size_t dec64 = dec64table[(sizeof(void*)==4) ? 0 : op-ref];
-            op[0] = ref[0];
-            op[1] = ref[1];
-            op[2] = ref[2];
-            op[3] = ref[3];
-            op += 4, ref += 4; ref -= dec32table[op-ref];
-            A32(op) = A32(ref);
-            op += STEPSIZE-4; ref -= dec64;
-        } else { LZ4_COPYSTEP(op,ref); }
-        cpy = op + length - (STEPSIZE-4);
-
-        if unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4))
-        {
-            if (cpy > oend-LASTLITERALS) goto _output_error;    // Error : last 5 bytes must be literals
-            LZ4_SECURECOPY(op, ref, (oend-COPYLENGTH));
-            while(op<cpy) *op++=*ref++;
-            op=cpy;
+        length += MINMATCH;
+
+        /* check external dictionary */
+        if ((dict==usingExtDict) && (match < lowPrefix)) {
+            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
+
+            if (length <= (size_t)(lowPrefix-match)) {
+                /* match can be copied as a single segment from external dictionary */
+                memmove(op, dictEnd - (lowPrefix-match), length);
+                op += length;
+            } else {
+                /* match encompass external dictionary and current block */
+                size_t const copySize = (size_t)(lowPrefix-match);
+                size_t const restSize = length - copySize;
+                memcpy(op, dictEnd - copySize, copySize);
+                op += copySize;
+                if (restSize > (size_t)(op-lowPrefix)) {  /* overlap copy */
+                    BYTE* const endOfMatch = op + restSize;
+                    const BYTE* copyFrom = lowPrefix;
+                    while (op < endOfMatch) *op++ = *copyFrom++;
+                } else {
+                    memcpy(op, lowPrefix, restSize);
+                    op += restSize;
+            }   }
             continue;
         }
-        LZ4_WILDCOPY(op, ref, cpy);
-        op=cpy;   // correction
+
+        /* copy match within block */
+        cpy = op + length;
+        if (unlikely(offset<8)) {
+            const int dec64 = dec64table[offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[offset];
+            memcpy(op+4, match, 4);
+            match -= dec64;
+        } else { LZ4_copy8(op, match); match+=8; }
+        op += 8;
+
+        if (unlikely(cpy>oend-12)) {
+            BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
+            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+            if (op < oCopyLimit) {
+                LZ4_wildCopy(op, match, oCopyLimit);
+                match += oCopyLimit - op;
+                op = oCopyLimit;
+            }
+            while (op<cpy) *op++ = *match++;
+        } else {
+            LZ4_copy8(op, match);
+            if (length>16) LZ4_wildCopy(op+8, match+8, cpy);
+        }
+        op=cpy;   /* correction */
     }
 
-    // end of decoding
+    /* end of decoding */
     if (endOnInput)
-       return (int) (((char*)op)-dest);     // Nb of output bytes decoded
+       return (int) (((char*)op)-dest);     /* Nb of output bytes decoded */
     else
-       return (int) (((char*)ip)-source);   // Nb of input bytes read
+       return (int) (((const char*)ip)-source);   /* Nb of input bytes read */
 
-    // Overflow error detected
+    /* Overflow error detected */
 _output_error:
-    return (int) (-(((char*)ip)-source))-1;
+    return (int) (-(((const char*)ip)-source))-1;
 }
 
 
-int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize)
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
 {
-    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, full, 0);
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
 }
 
-int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int inputSize, int maxOutputSize)
+int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
 {
-    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, withPrefix, full, 0);
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
 }
 
-int LZ4_decompress_safe_partial(const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize)
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
 {
-    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, partial, targetOutputSize);
+    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
 }
 
-int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int outputSize)
+
+/*===== streaming decompression functions =====*/
+
+LZ4_streamDecode_t* LZ4_createStreamDecode(void)
 {
-    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
+    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(1, sizeof(LZ4_streamDecode_t));
+    return lz4s;
 }
 
-int LZ4_decompress_fast(const char* source, char* dest, int outputSize)
+int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
 {
-#ifdef _MSC_VER   // This version is faster with Visual
-    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, noPrefix, full, 0);
-#else
-    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
-#endif
+    if (!LZ4_stream) return 0;   /* support free on NULL */
+    FREEMEM(LZ4_stream);
+    return 0;
+}
+
+/*!
+ * LZ4_setStreamDecode() :
+ * Use this function to instruct where to find the dictionary.
+ * This function is not necessary if previous data is still available where it was decoded.
+ * Loading a size of 0 is allowed (same effect as no dictionary).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    lz4sd->prefixSize = (size_t) dictSize;
+    lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
+    lz4sd->externalDict = NULL;
+    lz4sd->extDictSize  = 0;
+    return 1;
+}
+
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks must still be available at the memory position where they were decoded.
+    If it's not possible, save the relevant part of decoded data into a safe buffer,
+    and indicate where it stands using LZ4_setStreamDecode()
+*/
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest) {
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += result;
+        lz4sd->prefixEnd  += result;
+    } else {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = result;
+        lz4sd->prefixEnd  = (BYTE*)dest + result;
+    }
+
+    return result;
+}
+
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest) {
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += originalSize;
+        lz4sd->prefixEnd  += originalSize;
+    } else {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
+    }
+
+    return result;
+}
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as "_continue" ones,
+    the dictionary must be explicitly provided within parameters
+*/
+
+FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
+{
+    if (dictSize==0)
+        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0);
+    if (dictStart+dictSize == dest) {
+        if (dictSize >= (int)(64 KB - 1))
+            return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0);
+        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
+    }
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
+}
+
+/* debug function */
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+
+/*=*************************************************
+*  Obsolete Functions
+***************************************************/
+/* obsolete compression functions */
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); }
+int LZ4_compress(const char* source, char* dest, int inputSize) { return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize)); }
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); }
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); }
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, maxDstSize, 1); }
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); }
+
+/*
+These function names are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
+
+
+/* Obsolete Streaming functions */
+
+int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
+
+static void LZ4_init(LZ4_stream_t* lz4ds, BYTE* base)
+{
+    MEM_INIT(lz4ds, 0, sizeof(LZ4_stream_t));
+    lz4ds->internal_donotuse.bufferStart = base;
+}
+
+int LZ4_resetStreamState(void* state, char* inputBuffer)
+{
+    if ((((uptrval)state) & 3) != 0) return 1;   /* Error : pointer is not aligned on 4-bytes boundary */
+    LZ4_init((LZ4_stream_t*)state, (BYTE*)inputBuffer);
+    return 0;
+}
+
+void* LZ4_create (char* inputBuffer)
+{
+    LZ4_stream_t* lz4ds = (LZ4_stream_t*)ALLOCATOR(8, sizeof(LZ4_stream_t));
+    LZ4_init (lz4ds, (BYTE*)inputBuffer);
+    return lz4ds;
+}
+
+char* LZ4_slideInputBuffer (void* LZ4_Data)
+{
+    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)LZ4_Data)->internal_donotuse;
+    int dictSize = LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)ctx->bufferStart, 64 KB);
+    return (char*)(ctx->bufferStart + dictSize);
+}
+
+/* Obsolete streaming decompression functions */
+
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
+}
+
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
 }
 
+#endif   /* LZ4_COMMONDEFS_ONLY */
diff --git a/lz4/lz4.h b/lz4/lz4.h
index af05dbc..86ca0d5 100644
--- a/lz4/lz4.h
+++ b/lz4/lz4.h
@@ -1,7 +1,8 @@
 /*
-   LZ4 - Fast LZ compression algorithm
-   Header File
-   Copyright (C) 2011-2013, Yann Collet.
+ *  LZ4 - Fast LZ compression algorithm
+ *  Header File
+ *  Copyright (C) 2011-2017, Yann Collet.
+
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
@@ -28,220 +29,433 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
 */
-#pragma once
-
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
+#ifndef LZ4_H_2983827168210
+#define LZ4_H_2983827168210
 
-//**************************************
-// Compiler Options
-//**************************************
-#if defined(_MSC_VER) && !defined(__cplusplus)   // Visual Studio
-#  define inline __inline           // Visual C is not C99, but supports some kind of inline
-#endif
+/* --- Dependency --- */
+#include <stddef.h>   /* size_t */
 
 
-//****************************
-// Simple Functions
-//****************************
+/**
+  Introduction
 
-int LZ4_compress        (const char* source, char* dest, int inputSize);
-int LZ4_decompress_safe (const char* source, char* dest, int inputSize, int maxOutputSize);
+  LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core,
+  scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
+  multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
 
-/*
-LZ4_compress() :
-    Compresses 'inputSize' bytes from 'source' into 'dest'.
-    Destination buffer must be already allocated,
-    and must be sized to handle worst cases situations (input data not compressible)
-    Worst case size evaluation is provided by function LZ4_compressBound()
-    inputSize : Max supported value is LZ4_MAX_INPUT_VALUE
-    return : the number of bytes written in buffer dest
-             or 0 if the compression fails
-
-LZ4_decompress_safe() :
-    maxOutputSize : is the size of the destination buffer (which must be already allocated)
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits (never writes outside of output buffer, and never reads outside of input buffer). Therefore, it is protected against malicious data packets
-*/
+  The LZ4 compression library provides in-memory compression and decompression functions.
+  Compression can be done in:
+    - a single step (described as Simple Functions)
+    - a single step, reusing a context (described in Advanced Functions)
+    - unbounded multiple steps (described as Streaming compression)
 
+  lz4.h provides block compression functions. It gives full buffer control to user.
+  Decompressing an lz4-compressed block also requires metadata (such as compressed size).
+  Each application is free to encode such metadata in whichever way it wants.
 
-//****************************
-// Advanced Functions
-//****************************
-#define LZ4_MAX_INPUT_SIZE        0x7E000000   // 2 113 929 216 bytes
-#define LZ4_COMPRESSBOUND(isize)  ((unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
-static inline int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
+  An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md),
+  take care of encoding standard metadata alongside LZ4-compressed blocks.
+  If your application requires interoperability, it's recommended to use it.
+  A library is provided to take care of it, see lz4frame.h.
+*/
 
+/*^***************************************************************
+*  Export parameters
+*****************************************************************/
 /*
-LZ4_compressBound() :
-    Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
-    primarily useful for memory allocation of output buffer.
-    inline function is recommended for the general case,
-    macro is also provided when result needs to be evaluated at compilation (such as stack memory allocation).
-
-    isize  : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE
-    return : maximum output size in a "worst case" scenario
-             or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+*  LZ4_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*  LZ4LIB_API :
+*  Control library symbols visibility.
 */
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+#  define LZ4LIB_API __declspec(dllexport)
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+#  define LZ4LIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#elif defined(__GNUC__) && (__GNUC__ >= 4)
+#  define LZ4LIB_API __attribute__ ((__visibility__ ("default")))
+#else
+#  define LZ4LIB_API
+#endif
 
 
-int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+/*------   Version   ------*/
+#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR    8    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  0    /* for tweaks, bug-fixes, or development */
 
-/*
-LZ4_compress_limitedOutput() :
-    Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-    If it cannot achieve it, compression will stop, and result of the function will be zero.
-    This function never writes outside of provided output buffer.
-
-    inputSize  : Max supported value is LZ4_MAX_INPUT_VALUE
-    maxOutputSize : is the size of the destination buffer (which must be already allocated)
-    return : the number of bytes written in buffer 'dest'
-             or 0 if the compression fails
-*/
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
 
+#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
+#define LZ4_QUOTE(str) #str
+#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
+#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
 
-int LZ4_decompress_fast (const char* source, char* dest, int outputSize);
+LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; to be used when checking dll version */
+LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; to be used when checking dll version */
 
-/*
-LZ4_decompress_fast() :
-    outputSize : is the original (uncompressed) size
-    return : the number of bytes read from the source buffer (in other words, the compressed size)
-             If the source stream is malformed, the function will stop decoding and return a negative result.
-    note : This function is a bit faster than LZ4_decompress_safe()
-           This function never writes outside of output buffers, but may read beyond input buffer in case of malicious data packet.
-           Use this function preferably into a trusted environment (data to decode comes from a trusted source).
-           Destination buffer must be already allocated. Its size must be a minimum of 'outputSize' bytes.
-*/
 
-int LZ4_decompress_safe_partial (const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize);
+/*-************************************
+*  Tuning parameter
+**************************************/
+/*!
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#ifndef LZ4_MEMORY_USAGE
+# define LZ4_MEMORY_USAGE 14
+#endif
 
-/*
-LZ4_decompress_safe_partial() :
-    This function decompress a compressed block of size 'inputSize' at position 'source'
-    into output buffer 'dest' of size 'maxOutputSize'.
-    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
-    reducing decompression time.
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
-       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
-             Always control how many bytes were decoded.
+/*-************************************
+*  Simple Functions
+**************************************/
+/*! LZ4_compress_default() :
+    Compresses 'sourceSize' bytes from buffer 'source'
+    into already allocated 'dest' buffer of size 'maxDestSize'.
+    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
+    It also runs faster, so it's a recommended setting.
+    If the function cannot compress 'source' into a more limited 'dest' budget,
+    compression stops *immediately*, and the function result is zero.
+    As a consequence, 'dest' content is not valid.
+    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
+        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
+        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
+              or 0 if compression fails */
+LZ4LIB_API int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
+
+/*! LZ4_decompress_safe() :
+    compressedSize : is the precise full size of the compressed block.
+    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
+    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
+             If destination buffer is not large enough, decoding will stop and output an error code (<0).
              If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+             This function is protected against buffer overflow exploits, including malicious data packets.
+             It never writes outside output buffer, nor reads outside input buffer.
 */
+LZ4LIB_API int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
 
 
-//*****************************
-// Using an external allocation
-//*****************************
-int LZ4_sizeofState();
-int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
-int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-
-/*
-These functions are provided should you prefer to allocate memory for compression tables with your own allocation methods.
-To know how much memory must be allocated for the compression tables, use :
-int LZ4_sizeofState();
-
-Note that tables must be aligned on 4-bytes boundaries, otherwise compression will fail (return code 0).
+/*-************************************
+*  Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
 
-The allocated memory can be provided to the compressions functions using 'void* state' parameter.
-LZ4_compress_withState() and LZ4_compress_limitedOutput_withState() are equivalent to previously described functions.
-They just use the externally allocated memory area instead of allocating their own (on stack, or on heap).
+/*!
+LZ4_compressBound() :
+    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+    This function is primarily useful for memory allocation purposes (destination buffer size).
+    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
+        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
+        return : maximum output size in a "worst case" scenario
+              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
 */
-
-
-//****************************
-// Streaming Functions
-//****************************
-
-void* LZ4_create (const char* inputBuffer);
-int   LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize);
-int   LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize);
-char* LZ4_slideInputBuffer (void* LZ4_Data);
-int   LZ4_free (void* LZ4_Data);
-
-/*
-These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks.
-In order to achieve this, it is necessary to start creating the LZ4 Data Structure, thanks to the function :
-
-void* LZ4_create (const char* inputBuffer);
-The result of the function is the (void*) pointer on the LZ4 Data Structure.
-This pointer will be needed in all other functions.
-If the pointer returned is NULL, then the allocation has failed, and compression must be aborted.
-The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
-The input buffer must be already allocated, and size at least 192KB.
-'inputBuffer' will also be the 'const char* source' of the first block.
-
-All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'.
-To compress each block, use either LZ4_compress_continue() or LZ4_compress_limitedOutput_continue().
-Their behavior are identical to LZ4_compress() or LZ4_compress_limitedOutput(),
-but require the LZ4 Data Structure as their first argument, and check that each block starts right after the previous one.
-If next block does not begin immediately after the previous one, the compression will fail (return 0).
-
-When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to :
-char* LZ4_slideInputBuffer(void* LZ4_Data);
-must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer.
-Note that, for this function to work properly, minimum size of an input buffer must be 192KB.
-==> The memory position where the next input data block must start is provided as the result of the function.
-
-Compression can then resume, using LZ4_compress_continue() or LZ4_compress_limitedOutput_continue(), as usual.
-
-When compression is completed, a call to LZ4_free() will release the memory used by the LZ4 Data Structure.
+LZ4LIB_API int LZ4_compressBound(int inputSize);
+
+/*!
+LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
 */
+LZ4LIB_API int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
 
-int LZ4_sizeofStreamState();
-int LZ4_resetStreamState(void* state, const char* inputBuffer);
-
-/*
-These functions achieve the same result as :
-void* LZ4_create (const char* inputBuffer);
-
-They are provided here to allow the user program to allocate memory using its own routines.
-
-To know how much space must be allocated, use LZ4_sizeofStreamState();
-Note also that space must be 4-bytes aligned.
 
-Once space is allocated, you must initialize it using : LZ4_resetStreamState(void* state, const char* inputBuffer);
-void* state is a pointer to the space allocated.
-It must be aligned on 4-bytes boundaries, and be large enough.
-The parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
-The input buffer must be already allocated, and size at least 192KB.
-'inputBuffer' will also be the 'const char* source' of the first block.
-
-The same space can be re-used multiple times, just by initializing it each time with LZ4_resetStreamState().
-return value of LZ4_resetStreamState() must be 0 is OK.
-Any other value means there was an error (typically, pointer is not aligned on 4-bytes boundaries).
+/*!
+LZ4_compress_fast_extState() :
+    Same compression function, just using an externally allocated memory space to store compression state.
+    Use LZ4_sizeofState() to know how much memory must be allocated,
+    and allocate it on 8-bytes boundaries (using malloc() typically).
+    Then, provide it as 'void* state' to compression function.
 */
-
-
-int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int inputSize, int maxOutputSize);
-int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int outputSize);
-
-/*
-*_withPrefix64k() :
-    These decoding functions work the same as their "normal name" versions,
-    but can use up to 64KB of data in front of 'char* dest'.
-    These functions are necessary to decode inter-dependant blocks.
+LZ4LIB_API int LZ4_sizeofState(void);
+LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
+
+
+/*!
+LZ4_compress_destSize() :
+    Reverse the logic, by compressing as much data as possible from 'source' buffer
+    into already allocated buffer 'dest' of size 'targetDestSize'.
+    This function either compresses the entire 'source' content into 'dest' if it's large enough,
+    or fill 'dest' buffer completely with as much data as possible from 'source'.
+        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
+                         New value is necessarily <= old value.
+        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+              or 0 if compression fails
 */
+LZ4LIB_API int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
 
 
-//****************************
-// Obsolete Functions
-//****************************
-
-static inline int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
-static inline int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
+/*!
+LZ4_decompress_fast() :
+    originalSize : is the original and therefore uncompressed size
+    return : the number of bytes read from the source buffer (in other words, the compressed size)
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
+    note : This function fully respect memory boundaries for properly formed compressed data.
+           It is a bit faster than LZ4_decompress_safe().
+           However, it does not provide any protection against intentionally modified data stream (malicious input).
+           Use this function in trusted environment only (data to decode comes from a trusted source).
+*/
+LZ4LIB_API int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
 
-/*
-These functions are deprecated and should no longer be used.
-They are provided here for compatibility with existing user programs.
+/*!
+LZ4_decompress_safe_partial() :
+    This function decompress a compressed block of size 'compressedSize' at position 'source'
+    into destination buffer 'dest' of size 'maxDecompressedSize'.
+    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
+    reducing decompression time.
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
+       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
+             Always control how many bytes were decoded.
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
 */
+LZ4LIB_API int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+
+/*-*********************************************
+*  Streaming Compression Functions
+***********************************************/
+typedef union LZ4_stream_u LZ4_stream_t;   /* incomplete type (defined later) */
+
+/*! LZ4_createStream() and LZ4_freeStream() :
+ *  LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
+ *  LZ4_freeStream() releases its memory.
+ */
+LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
+LZ4LIB_API int           LZ4_freeStream (LZ4_stream_t* streamPtr);
+
+/*! LZ4_resetStream() :
+ *  An LZ4_stream_t structure can be allocated once and re-used multiple times.
+ *  Use this function to init an allocated `LZ4_stream_t` structure and start a new compression.
+ */
+LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+/*! LZ4_loadDict() :
+ *  Use this function to load a static dictionary into LZ4_stream.
+ *  Any previous data will be forgotten, only 'dictionary' will remain in memory.
+ *  Loading a size of 0 is allowed.
+ *  Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+
+/*! LZ4_compress_fast_continue() :
+ *  Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
+ *  Important : Previous data blocks are assumed to remain present and unmodified !
+ *  'dst' buffer must be already allocated.
+ *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ *  If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function @return==0.
+ *  After an error, the stream status is invalid, it can only be reset or freed.
+ */
+LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_saveDict() :
+ *  If previously compressed data block is not guaranteed to remain available at its current memory location,
+ *  save it into a safer place (char* safeBuffer).
+ *  Note : it's not necessary to call LZ4_loadDict() after LZ4_saveDict(), dictionary is immediately usable.
+ *  @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ */
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
+
+
+/*-**********************************************
+*  Streaming Decompression Functions
+*  Bufferless synchronous API
+************************************************/
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* incomplete type (defined later) */
+
+/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
+ *  creation / destruction of streaming decompression tracking structure */
+LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*! LZ4_setStreamDecode() :
+ *  Use this function to instruct where to find the dictionary.
+ *  Setting a size of 0 is allowed (same effect as reset).
+ *  @return : 1 if OK, 0 if error
+ */
+LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*! LZ4_decompress_*_continue() :
+ *  These decoding functions allow decompression of multiple blocks in "streaming" mode.
+ *  Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
+ *  In the case of a ring buffers, decoding buffer must be either :
+ *  - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
+ *    In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
+ *  - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *    maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block.
+ *    In which case, encoding and decoding buffers do not need to be synchronized,
+ *    and encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *  - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *    In which case, encoding and decoding buffers do not need to be synchronized,
+ *    and encoding ring buffer can have any size, including larger than decoding buffer.
+ *  Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
+ *  and indicate where it is saved using LZ4_setStreamDecode()
+*/
+LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
+
+
+/*! LZ4_decompress_*_usingDict() :
+ *  These decoding functions work the same as
+ *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
+ *  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ */
+LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
+
+
+/*^**********************************************
+ * !!!!!!   STATIC LINKING ONLY   !!!!!!
+ ***********************************************/
+/*-************************************
+ *  Private definitions
+ **************************************
+ * Do not use these definitions.
+ * They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Using these definitions will expose code to API and/or ABI break in future versions of the library.
+ **************************************/
+#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
+
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#include <stdint.h>
+
+typedef struct {
+    uint32_t hashTable[LZ4_HASH_SIZE_U32];
+    uint32_t currentOffset;
+    uint32_t initCheck;
+    const uint8_t* dictionary;
+    uint8_t* bufferStart;   /* obsolete, used for slideInputBuffer */
+    uint32_t dictSize;
+} LZ4_stream_t_internal;
+
+typedef struct {
+    const uint8_t* externalDict;
+    size_t extDictSize;
+    const uint8_t* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+#else
+
+typedef struct {
+    unsigned int hashTable[LZ4_HASH_SIZE_U32];
+    unsigned int currentOffset;
+    unsigned int initCheck;
+    const unsigned char* dictionary;
+    unsigned char* bufferStart;   /* obsolete, used for slideInputBuffer */
+    unsigned int dictSize;
+} LZ4_stream_t_internal;
+
+typedef struct {
+    const unsigned char* externalDict;
+    size_t extDictSize;
+    const unsigned char* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+#endif
 
+/*!
+ * LZ4_stream_t :
+ * information structure to track an LZ4 stream.
+ * init this structure before first use.
+ * note : only use in association with static linking !
+ *        this definition is not API/ABI safe,
+ *        it may change in a future version !
+ */
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+union LZ4_stream_u {
+    unsigned long long table[LZ4_STREAMSIZE_U64];
+    LZ4_stream_t_internal internal_donotuse;
+} ;  /* previously typedef'd to LZ4_stream_t */
+
+
+/*!
+ * LZ4_streamDecode_t :
+ * information structure to track an LZ4 stream during decompression.
+ * init this structure  using LZ4_setStreamDecode (or memset()) before first use
+ * note : only use in association with static linking !
+ *        this definition is not API/ABI safe,
+ *        and may change in a future version !
+ */
+#define LZ4_STREAMDECODESIZE_U64  4
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+union LZ4_streamDecode_u {
+    unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+    LZ4_streamDecode_t_internal internal_donotuse;
+} ;   /* previously typedef'd to LZ4_streamDecode_t */
+
+
+/*-************************************
+*  Obsolete Functions
+**************************************/
+
+/*! Deprecation warnings
+   Should deprecation warnings be a problem,
+   it is generally possible to disable them,
+   typically with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual.
+   Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS */
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
+#else
+#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define LZ4_DEPRECATED(message) [[deprecated(message)]]
+#  elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif (LZ4_GCC_VERSION >= 301)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
+#    define LZ4_DEPRECATED(message)
+#  endif
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+
+/* Obsolete compression functions */
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress               (const char* source, char* dest, int sourceSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Obsolete decompression functions */
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_fast() instead") int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_safe() instead") int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+
+/* Obsolete streaming functions; use new streaming interface whenever possible */
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStream() instead") int   LZ4_sizeofStreamState(void);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStream() instead")  int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_saveDict() instead")     char* LZ4_slideInputBuffer (void* state);
+
+/* Obsolete streaming decoding functions */
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+#endif /* LZ4_H_2983827168210 */
 
 
 #if defined (__cplusplus)
diff --git a/lz4/lz4.mk b/lz4/lz4.mk
index 8aa138e..41bd169 100644
--- a/lz4/lz4.mk
+++ b/lz4/lz4.mk
@@ -1,7 +1,8 @@
 LIBLZ4_SOURCES := \
 	xxhash.c \
 	lz4.c \
-	lz4hc.c \
+	lz4frame.c \
+	lz4hc.c
 
 $(eval $(call library,lz4,$(LIBLZ4_SOURCES),))
-$(eval $(call program,lz4cli,lz4,lz4cli.c))
+$(eval $(call include_sub_makes,lz4cli))
diff --git a/lz4/lz4cli.c b/lz4/lz4cli.c
deleted file mode 100644
index 335310b..0000000
--- a/lz4/lz4cli.c
+++ /dev/null
@@ -1,1266 +0,0 @@
-/*
-  LZ4cli.c - LZ4 Command Line Interface
-  Copyright (C) Yann Collet 2011-2013
-  GPL v2 License
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2 of the License, or
-  (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License along
-  with this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-  You can contact the author at :
-  - LZ4 source repository : http://code.google.com/p/lz4/
-  - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
-*/
-/*
-  Note : this is stand-alone program.
-  It is not part of LZ4 compression library, it is a user program of the LZ4 library.
-  The license of LZ4 library is BSD.
-  The license of xxHash library is BSD.
-  The license of this compression CLI program is GPLv2.
-*/
-
-//**************************************
-// Tuning parameters
-//**************************************
-// DISABLE_LZ4C_LEGACY_OPTIONS :
-// Control the availability of -c0, -c1 and -hc legacy arguments
-// Default : Legacy options are enabled
-// #define DISABLE_LZ4C_LEGACY_OPTIONS
-
-
-//**************************************
-// Compiler Options
-//**************************************
-// Disable some Visual warning messages
-#ifdef _MSC_VER  // Visual Studio
-#  define _CRT_SECURE_NO_WARNINGS
-#  define _CRT_SECURE_NO_DEPRECATE     // VS2005
-#  pragma warning(disable : 4127)      // disable: C4127: conditional expression is constant
-#endif
-
-#define _FILE_OFFSET_BITS 64   // Large file support on 32-bits unix
-#define _POSIX_SOURCE 1        // for fileno() within <stdio.h> on unix
-
-
-//****************************
-// Includes
-//****************************
-#include <stdio.h>    // fprintf, fopen, fread, _fileno, stdin, stdout
-#include <stdlib.h>   // malloc
-#include <string.h>   // strcmp, strlen
-#include <time.h>     // clock
-#include "lz4.h"
-#include "lz4hc.h"
-#include "xxhash.h"
-
-//****************************
-// Bench
-//****************************
-
-// #include "bench.h"
-#define BMK_SetBlocksize(a) ((void)a)
-#define BMK_SetNbIterations(a) ((void)a)
-#define BMK_SetPause()
-#define BMK_benchFile(a,b,c) 0
-
-
-//****************************
-// OS-specific Includes
-//****************************
-#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
-#  include <fcntl.h>    // _O_BINARY
-#  include <io.h>       // _setmode, _isatty
-#  ifdef __MINGW32__
-   int _fileno(FILE *stream);   // MINGW somehow forgets to include this windows declaration into <stdio.h>
-#  endif
-#  define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
-#  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
-#else
-#  include <unistd.h>   // isatty
-#  define SET_BINARY_MODE(file)
-#  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
-#endif
-
-
-//**************************************
-// Compiler-specific functions
-//**************************************
-#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-
-#if defined(_MSC_VER)    // Visual Studio
-#  define swap32 _byteswap_ulong
-#elif GCC_VERSION >= 403
-#  define swap32 __builtin_bswap32
-#else
-  static inline unsigned int swap32(unsigned int x)
-  {
-    return ((x << 24) & 0xff000000 ) |
-           ((x <<  8) & 0x00ff0000 ) |
-           ((x >>  8) & 0x0000ff00 ) |
-           ((x >> 24) & 0x000000ff );
-  }
-#endif
-
-
-//****************************
-// Constants
-//****************************
-#define COMPRESSOR_NAME "LZ4 Compression CLI"
-#ifndef LZ4_VERSION
-#  define LZ4_VERSION "v1.1.0"
-#endif
-#define AUTHOR "Yann Collet"
-#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), LZ4_VERSION, AUTHOR, __DATE__
-#define LZ4_EXTENSION ".lz4"
-
-#define KB *(1U<<10)
-#define MB *(1U<<20)
-#define GB *(1U<<30)
-
-#define _1BIT  0x01
-#define _2BITS 0x03
-#define _3BITS 0x07
-#define _4BITS 0x0F
-#define _8BITS 0xFF
-
-#define MAGICNUMBER_SIZE   4
-#define LZ4S_MAGICNUMBER   0x184D2204
-#define LZ4S_SKIPPABLE0    0x184D2A50
-#define LZ4S_SKIPPABLEMASK 0xFFFFFFF0
-#define LEGACY_MAGICNUMBER 0x184C2102
-
-#define CACHELINE 64
-#define LEGACY_BLOCKSIZE   (8 MB)
-#define MIN_STREAM_BUFSIZE (1 MB + 64 KB)
-#define LZ4S_BLOCKSIZEID_DEFAULT 7
-#define LZ4S_CHECKSUM_SEED 0
-#define LZ4S_EOS 0
-#define LZ4S_MAXHEADERSIZE (MAGICNUMBER_SIZE+2+8+4+1)
-
-
-//**************************************
-// Architecture Macros
-//**************************************
-static const int one = 1;
-#define CPU_LITTLE_ENDIAN   (*(char*)(&one))
-#define CPU_BIG_ENDIAN      (!CPU_LITTLE_ENDIAN)
-#define LITTLE_ENDIAN_32(i) (CPU_LITTLE_ENDIAN?(i):swap32(i))
-
-
-//**************************************
-// Macros
-//**************************************
-#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-
-//**************************************
-// Special input/output
-//**************************************
-#define NULL_OUTPUT "null"
-char stdinmark[] = "stdin";
-char stdoutmark[] = "stdout";
-#ifdef _WIN32
-char nulmark[] = "nul";
-#else
-char nulmark[] = "/dev/null";
-#endif
-
-
-//**************************************
-// Local Parameters
-//**************************************
-static char* programName;
-static int displayLevel = 2;   // 0 : no display  // 1: errors  // 2 : + result + interaction + warnings ;  // 3 : + progression;  // 4 : + information
-static int overwrite = 0;
-static int blockSizeId = LZ4S_BLOCKSIZEID_DEFAULT;
-static int blockChecksum = 0;
-static int streamChecksum = 1;
-static int blockIndependence = 1;
-
-
-//**************************************
-// Exceptions
-//**************************************
-#define DEBUG 0
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...)                                             \
-{                                                                         \
-    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
-    DISPLAYLEVEL(1, "Error %i : ", error);                                \
-    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
-    DISPLAYLEVEL(1, "\n");                                                \
-    exit(error);                                                          \
-}
-
-
-//**************************************
-// Version modifiers
-//**************************************
-#define EXTENDED_ARGUMENTS
-#define EXTENDED_HELP
-#define EXTENDED_FORMAT
-#define DEFAULT_COMPRESSOR   compress_file
-#define DEFAULT_DECOMPRESSOR decodeLZ4S
-
-
-//****************************
-// Functions
-//****************************
-int usage()
-{
-    DISPLAY( "Usage :\n");
-    DISPLAY( "      %s [arg] [input] [output]\n", programName);
-    DISPLAY( "\n");
-    DISPLAY( "input   : a filename\n");
-    DISPLAY( "          with no FILE, or when FILE is - or %s, read standard input\n", stdinmark);
-    DISPLAY( "Arguments :\n");
-    DISPLAY( " -1     : Fast compression (default) \n");
-    DISPLAY( " -9     : High compression \n");
-    DISPLAY( " -d     : decompression (default for %s extension)\n", LZ4_EXTENSION);
-    DISPLAY( " -z     : force compression\n");
-    DISPLAY( " -f     : overwrite output without prompting \n");
-    DISPLAY( " -h/-H  : display help/long help and exit\n");
-    return 0;
-}
-
-int usage_advanced()
-{
-    DISPLAY(WELCOME_MESSAGE);
-    usage();
-    DISPLAY( "\n");
-    DISPLAY( "Advanced arguments :\n");
-    DISPLAY( " -V     : display Version number and exit\n");
-    DISPLAY( " -v     : verbose mode\n");
-    DISPLAY( " -q     : suppress warnings; specify twice to suppress errors too\n");
-    DISPLAY( " -c     : force write to standard output, even if it is the console\n");
-    DISPLAY( " -t     : test compressed file integrity\n");
-    DISPLAY( " -l     : compress using Legacy format (Linux kernel compression)\n");
-    DISPLAY( " -B#    : Block size [4-7](default : 7)\n");
-    DISPLAY( " -BD    : Block dependency (improve compression ratio)\n");
-    DISPLAY( " -BX    : enable block checksum (default:disabled)\n");
-    DISPLAY( " -Sx    : disable stream checksum (default:enabled)\n");
-    DISPLAY( "Benchmark arguments :\n");
-    DISPLAY( " -b     : benchmark file(s)\n");
-    DISPLAY( " -i#    : iteration loops [1-9](default : 3), benchmark mode only\n");
-#if !defined(DISABLE_LZ4C_LEGACY_OPTIONS)
-    DISPLAY( "Legacy arguments :\n");
-    DISPLAY( " -c0    : fast compression\n");
-    DISPLAY( " -c1    : high compression\n");
-    DISPLAY( " -hc    : high compression\n");
-    DISPLAY( " -y     : overwrite output without prompting \n");
-    DISPLAY( " -s     : suppress warnings \n");
-#endif // DISABLE_LZ4C_LEGACY_OPTIONS
-    EXTENDED_HELP;
-    return 0;
-}
-
-int usage_longhelp()
-{
-    DISPLAY( "\n");
-    DISPLAY( "Which values can get [output] ? \n");
-    DISPLAY( "[output] : a filename\n");
-    DISPLAY( "          '%s', or '-' for standard output (pipe mode)\n", stdoutmark);
-    DISPLAY( "          '%s' to discard output (test mode)\n", NULL_OUTPUT);
-    DISPLAY( "[output] can be left empty. In this case, it receives the following value : \n");
-    DISPLAY( "          - if stdout is not the console, then [output] = stdout \n");
-    DISPLAY( "          - if stdout is console : \n");
-    DISPLAY( "               + if compression selected, output to filename%s \n", LZ4_EXTENSION);
-    DISPLAY( "               + if decompression selected, output to filename without '%s'\n", LZ4_EXTENSION);
-    DISPLAY( "                    > if input filename has no '%s' extension : error\n", LZ4_EXTENSION);
-    DISPLAY( "\n");
-    DISPLAY( "Compression levels : \n");
-    DISPLAY( "There are technically 2 accessible compression levels.\n");
-    DISPLAY( "-0 ... -2 => Fast compression\n");
-    DISPLAY( "-3 ... -9 => High compression\n");
-    DISPLAY( "\n");
-    DISPLAY( "stdin, stdout and the console : \n");
-    DISPLAY( "To protect the console from binary flooding (bad argument mistake)\n");
-    DISPLAY( "%s will refuse to read from console, or write to console \n", programName);
-    DISPLAY( "except if '-c' command is specified, to force output to console \n");
-    DISPLAY( "\n");
-    DISPLAY( "Simple example :\n");
-    DISPLAY( "1 : compress 'filename' fast, using default output name 'filename.lz4'\n");
-    DISPLAY( "          %s filename\n", programName);
-    DISPLAY( "\n");
-    DISPLAY( "Arguments can be appended together, or provided independently. For example :\n");
-    DISPLAY( "2 : compress 'filename' in high compression mode, overwrite output if exists\n");
-    DISPLAY( "          %s -f9 filename \n", programName);
-    DISPLAY( "    is equivalent to :\n");
-    DISPLAY( "          %s -f -9 filename \n", programName);
-    DISPLAY( "\n");
-    DISPLAY( "%s can be used in 'pure pipe mode', for example :\n", programName);
-    DISPLAY( "3 : compress data stream from 'generator', send result to 'consumer'\n");
-    DISPLAY( "          generator | %s | consumer \n", programName);
-#if !defined(DISABLE_LZ4C_LEGACY_OPTIONS)
-    DISPLAY( "\n");
-    DISPLAY( "Warning :\n");
-    DISPLAY( "Legacy arguments take precedence. Therefore : \n");
-    DISPLAY( "          %s -hc filename\n", programName);
-    DISPLAY( "means 'compress filename in high compression mode'\n");
-    DISPLAY( "It is not equivalent to :\n");
-    DISPLAY( "          %s -h -c filename\n", programName);
-    DISPLAY( "which would display help text and exit\n");
-#endif // DISABLE_LZ4C_LEGACY_OPTIONS
-    return 0;
-}
-
-int badusage()
-{
-    DISPLAYLEVEL(1, "Incorrect parameters\n");
-    if (displayLevel >= 1) usage();
-    exit(1);
-}
-
-
-static int          LZ4S_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
-static unsigned int LZ4S_GetCheckBits_FromXXH (unsigned int xxh) { return (xxh >> 8) & _8BITS; }
-static int          LZ4S_isSkippableMagicNumber(unsigned int magic) { return (magic & LZ4S_SKIPPABLEMASK) == LZ4S_SKIPPABLE0; }
-
-
-int get_fileHandle(char* input_filename, char* output_filename, FILE** pfinput, FILE** pfoutput)
-{
-
-    if (!strcmp (input_filename, stdinmark))
-    {
-        DISPLAYLEVEL(4,"Using stdin for input\n");
-        *pfinput = stdin;
-        SET_BINARY_MODE(stdin);
-    }
-    else
-    {
-        *pfinput = fopen(input_filename, "rb");
-    }
-
-    if (!strcmp (output_filename, stdoutmark))
-    {
-        DISPLAYLEVEL(4,"Using stdout for output\n");
-        *pfoutput = stdout;
-        SET_BINARY_MODE(stdout);
-    }
-    else
-    {
-        // Check if destination file already exists
-        *pfoutput=0;
-        if (output_filename != nulmark) *pfoutput = fopen( output_filename, "rb" );
-        if (*pfoutput!=0)
-        {
-            fclose(*pfoutput);
-            if (!overwrite)
-            {
-                char ch;
-                DISPLAYLEVEL(2, "Warning : %s already exists\n", output_filename);
-                DISPLAYLEVEL(2, "Overwrite ? (Y/N) : ");
-                if (displayLevel <= 1) EXM_THROW(11, "Operation aborted : %s already exists", output_filename);   // No interaction possible
-                ch = (char)getchar();
-                if ((ch!='Y') && (ch!='y')) EXM_THROW(11, "Operation aborted : %s already exists", output_filename);
-            }
-        }
-        *pfoutput = fopen( output_filename, "wb" );
-    }
-
-    if ( *pfinput==0 ) EXM_THROW(12, "Pb opening %s", input_filename);
-    if ( *pfoutput==0) EXM_THROW(13, "Pb opening %s", output_filename);
-
-    return 0;
-}
-
-
-
-int legacy_compress_file(char* input_filename, char* output_filename, int compressionlevel)
-{
-    int (*compressionFunction)(const char*, char*, int);
-    unsigned long long filesize = 0;
-    unsigned long long compressedfilesize = MAGICNUMBER_SIZE;
-    char* in_buff;
-    char* out_buff;
-    FILE* finput;
-    FILE* foutput;
-    int displayLevel = (compressionlevel>0);
-    clock_t start, end;
-    size_t sizeCheck;
-
-
-    // Init
-    if (compressionlevel < 3) compressionFunction = LZ4_compress; else compressionFunction = LZ4_compressHC;
-    start = clock();
-    get_fileHandle(input_filename, output_filename, &finput, &foutput);
-    if ((displayLevel==2) && (compressionlevel==1)) displayLevel=3;
-
-    // Allocate Memory
-    in_buff = (char*)malloc(LEGACY_BLOCKSIZE);
-    out_buff = (char*)malloc(LZ4_compressBound(LEGACY_BLOCKSIZE));
-    if (!in_buff || !out_buff) EXM_THROW(21, "Allocation error : not enough memory");
-
-    // Write Archive Header
-    *(unsigned int*)out_buff = LITTLE_ENDIAN_32(LEGACY_MAGICNUMBER);
-    sizeCheck = fwrite(out_buff, 1, MAGICNUMBER_SIZE, foutput);
-    if (sizeCheck!=MAGICNUMBER_SIZE) EXM_THROW(22, "Write error : cannot write header");
-
-    // Main Loop
-    while (1)
-    {
-        unsigned int outSize;
-        // Read Block
-        int inSize = (int) fread(in_buff, (size_t)1, (size_t)LEGACY_BLOCKSIZE, finput);
-        if( inSize<=0 ) break;
-        filesize += inSize;
-        DISPLAYLEVEL(3, "\rRead : %i MB   ", (int)(filesize>>20));
-
-        // Compress Block
-        outSize = compressionFunction(in_buff, out_buff+4, inSize);
-        compressedfilesize += outSize+4;
-        DISPLAYLEVEL(3, "\rRead : %i MB  ==> %.2f%%   ", (int)(filesize>>20), (double)compressedfilesize/filesize*100);
-
-        // Write Block
-        * (unsigned int*) out_buff = LITTLE_ENDIAN_32(outSize);
-        sizeCheck = fwrite(out_buff, 1, outSize+4, foutput);
-        if (sizeCheck!=(size_t)(outSize+4)) EXM_THROW(23, "Write error : cannot write compressed block");
-    }
-
-    // Status
-    end = clock();
-    DISPLAYLEVEL(2, "\r%79s\r", "");
-    DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
-        (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
-    {
-        double seconds = (double)(end - start)/CLOCKS_PER_SEC;
-        DISPLAYLEVEL(4,"Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024);
-    }
-
-    // Close & Free
-    free(in_buff);
-    free(out_buff);
-    fclose(finput);
-    fclose(foutput);
-
-    return 0;
-}
-
-
-int compress_file_blockDependency(char* input_filename, char* output_filename, int compressionlevel)
-{
-    void* (*initFunction)       (const char*);
-    int   (*compressionFunction)(void*, const char*, char*, int, int);
-    char* (*translateFunction)  (void*);
-    int   (*freeFunction)       (void*);
-    void* ctx;
-    unsigned long long filesize = 0;
-    unsigned long long compressedfilesize = 0;
-    unsigned int checkbits;
-    char* in_buff, *in_start, *in_end;
-    char* out_buff;
-    FILE* finput;
-    FILE* foutput;
-    clock_t start, end;
-    unsigned int blockSize, inputBufferSize;
-    size_t sizeCheck, header_size;
-    void* streamChecksumState=NULL;
-
-
-    // Init
-    start = clock();
-    if ((displayLevel==2) && (compressionlevel>=3)) displayLevel=3;
-    if (compressionlevel>=3)
-    {
-        initFunction = LZ4_createHC;
-        compressionFunction = LZ4_compressHC_limitedOutput_continue;
-        translateFunction = LZ4_slideInputBufferHC;
-        freeFunction = LZ4_freeHC;
-    }
-    else
-    {
-        initFunction = LZ4_create;
-        compressionFunction = LZ4_compress_limitedOutput_continue;
-        translateFunction = LZ4_slideInputBuffer;
-        freeFunction = LZ4_free;
-    }
-    get_fileHandle(input_filename, output_filename, &finput, &foutput);
-    blockSize = LZ4S_GetBlockSize_FromBlockId (blockSizeId);
-
-    // Allocate Memory
-    inputBufferSize = blockSize + 64 KB;
-    if (inputBufferSize < MIN_STREAM_BUFSIZE) inputBufferSize = MIN_STREAM_BUFSIZE;
-    in_buff  = (char*)malloc(inputBufferSize);
-    out_buff = (char*)malloc(blockSize+CACHELINE);
-    if (!in_buff || !out_buff) EXM_THROW(31, "Allocation error : not enough memory");
-    in_start = in_buff; in_end = in_buff + inputBufferSize;
-    if (streamChecksum) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED);
-    ctx = initFunction(in_buff);
-
-    // Write Archive Header
-    *(unsigned int*)out_buff = LITTLE_ENDIAN_32(LZ4S_MAGICNUMBER);   // Magic Number, in Little Endian convention
-    *(out_buff+4)  = (1 & _2BITS) << 6 ;                             // Version('01')
-    *(out_buff+4) |= (blockIndependence & _1BIT) << 5;
-    *(out_buff+4) |= (blockChecksum & _1BIT) << 4;
-    *(out_buff+4) |= (streamChecksum & _1BIT) << 2;
-    *(out_buff+5)  = (char)((blockSizeId & _3BITS) << 4);
-    checkbits = XXH32((out_buff+4), 2, LZ4S_CHECKSUM_SEED);
-    checkbits = LZ4S_GetCheckBits_FromXXH(checkbits);
-    *(out_buff+6)  = (unsigned char) checkbits;
-    header_size = 7;
-    sizeCheck = fwrite(out_buff, 1, header_size, foutput);
-    if (sizeCheck!=header_size) EXM_THROW(32, "Write error : cannot write header");
-    compressedfilesize += header_size;
-
-    // Main Loop
-    while (1)
-    {
-        unsigned int outSize;
-        unsigned int inSize;
-        // Read Block
-        if ((in_start+blockSize) > in_end) in_start = translateFunction(ctx);
-        inSize = (unsigned int) fread(in_start, (size_t)1, (size_t)blockSize, finput);
-        if( inSize==0 ) break;   // No more input : end of compression
-        filesize += inSize;
-        DISPLAYLEVEL(3, "\rRead : %i MB   ", (int)(filesize>>20));
-        if (streamChecksum) XXH32_update(streamChecksumState, in_start, inSize);
-
-        // Compress Block
-        outSize = compressionFunction(ctx, in_start, out_buff+4, inSize, inSize-1);
-        if (outSize > 0) compressedfilesize += outSize+4; else compressedfilesize += inSize+4;
-        if (blockChecksum) compressedfilesize+=4;
-        DISPLAYLEVEL(3, "\rRead : %i MB  ==> %.2f%%   ", (int)(filesize>>20), (double)compressedfilesize/filesize*100);
-
-        // Write Block
-        if (outSize > 0)
-        {
-            int sizeToWrite;
-            * (unsigned int*) out_buff = LITTLE_ENDIAN_32(outSize);
-            if (blockChecksum)
-            {
-                unsigned int checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED);
-                * (unsigned int*) (out_buff+4+outSize) = LITTLE_ENDIAN_32(checksum);
-            }
-            sizeToWrite = 4 + outSize + (4*blockChecksum);
-            sizeCheck = fwrite(out_buff, 1, sizeToWrite, foutput);
-            if (sizeCheck!=(size_t)(sizeToWrite)) EXM_THROW(33, "Write error : cannot write compressed block");
-
-        }
-        else   // Copy Original
-        {
-            * (unsigned int*) out_buff = LITTLE_ENDIAN_32(inSize|0x80000000);   // Add Uncompressed flag
-            sizeCheck = fwrite(out_buff, 1, 4, foutput);
-            if (sizeCheck!=(size_t)(4)) EXM_THROW(34, "Write error : cannot write block header");
-            sizeCheck = fwrite(in_start, 1, inSize, foutput);
-            if (sizeCheck!=(size_t)(inSize)) EXM_THROW(35, "Write error : cannot write block");
-            if (blockChecksum)
-            {
-                unsigned int checksum = XXH32(in_start, inSize, LZ4S_CHECKSUM_SEED);
-                * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum);
-                sizeCheck = fwrite(out_buff, 1, 4, foutput);
-                if (sizeCheck!=(size_t)(4)) EXM_THROW(36, "Write error : cannot write block checksum");
-            }
-        }
-        in_start += inSize;
-    }
-
-    // End of Stream mark
-    * (unsigned int*) out_buff = LZ4S_EOS;
-    sizeCheck = fwrite(out_buff, 1, 4, foutput);
-    if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write end of stream");
-    compressedfilesize += 4;
-    if (streamChecksum)
-    {
-        unsigned int checksum = XXH32_digest(streamChecksumState);
-        * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum);
-        sizeCheck = fwrite(out_buff, 1, 4, foutput);
-        if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write stream checksum");
-        compressedfilesize += 4;
-    }
-
-    // Status
-    end = clock();
-    DISPLAYLEVEL(2, "\r%79s\r", "");
-    DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
-        (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
-    {
-        double seconds = (double)(end - start)/CLOCKS_PER_SEC;
-        DISPLAYLEVEL(4, "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024);
-    }
-
-    // Close & Free
-    freeFunction(ctx);
-    free(in_buff);
-    free(out_buff);
-    fclose(finput);
-    fclose(foutput);
-
-    return 0;
-}
-
-
-int compress_file(char* input_filename, char* output_filename, int compressionlevel)
-{
-    int (*compressionFunction)(const char*, char*, int, int);
-    unsigned long long filesize = 0;
-    unsigned long long compressedfilesize = 0;
-    unsigned int checkbits;
-    char* in_buff;
-    char* out_buff;
-    char* headerBuffer;
-    FILE* finput;
-    FILE* foutput;
-    clock_t start, end;
-    int blockSize;
-    size_t sizeCheck, header_size, readSize;
-    void* streamChecksumState=NULL;
-
-    // Branch out
-    if (blockIndependence==0) return compress_file_blockDependency(input_filename, output_filename, compressionlevel);
-
-    // Init
-    start = clock();
-    if ((displayLevel==2) && (compressionlevel>=3)) displayLevel=3;
-    if (compressionlevel < 3) compressionFunction = LZ4_compress_limitedOutput; else compressionFunction = LZ4_compressHC_limitedOutput;
-    get_fileHandle(input_filename, output_filename, &finput, &foutput);
-    blockSize = LZ4S_GetBlockSize_FromBlockId (blockSizeId);
-
-    // Allocate Memory
-    in_buff  = (char*)malloc(blockSize);
-    out_buff = (char*)malloc(blockSize+CACHELINE);
-    headerBuffer = (char*)malloc(LZ4S_MAXHEADERSIZE);
-    if (!in_buff || !out_buff || !(headerBuffer)) EXM_THROW(31, "Allocation error : not enough memory");
-    if (streamChecksum) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED);
-
-    // Write Archive Header
-    *(unsigned int*)headerBuffer = LITTLE_ENDIAN_32(LZ4S_MAGICNUMBER);   // Magic Number, in Little Endian convention
-    *(headerBuffer+4)  = (1 & _2BITS) << 6 ;                             // Version('01')
-    *(headerBuffer+4) |= (blockIndependence & _1BIT) << 5;
-    *(headerBuffer+4) |= (blockChecksum & _1BIT) << 4;
-    *(headerBuffer+4) |= (streamChecksum & _1BIT) << 2;
-    *(headerBuffer+5)  = (char)((blockSizeId & _3BITS) << 4);
-    checkbits = XXH32((headerBuffer+4), 2, LZ4S_CHECKSUM_SEED);
-    checkbits = LZ4S_GetCheckBits_FromXXH(checkbits);
-    *(headerBuffer+6)  = (unsigned char) checkbits;
-    header_size = 7;
-
-    // Write header
-    sizeCheck = fwrite(headerBuffer, 1, header_size, foutput);
-    if (sizeCheck!=header_size) EXM_THROW(32, "Write error : cannot write header");
-    compressedfilesize += header_size;
-
-    // read first block
-    readSize = fread(in_buff, (size_t)1, (size_t)blockSize, finput);
-
-    // Main Loop
-    while (readSize>0)
-    {
-        unsigned int outSize;
-
-        filesize += readSize;
-        DISPLAYLEVEL(3, "\rRead : %i MB   ", (int)(filesize>>20));
-        if (streamChecksum) XXH32_update(streamChecksumState, in_buff, (int)readSize);
-
-        // Compress Block
-        outSize = compressionFunction(in_buff, out_buff+4, (int)readSize, (int)readSize-1);
-        if (outSize > 0) compressedfilesize += outSize+4; else compressedfilesize += readSize+4;
-        if (blockChecksum) compressedfilesize+=4;
-        DISPLAYLEVEL(3, "\rRead : %i MB  ==> %.2f%%   ", (int)(filesize>>20), (double)compressedfilesize/filesize*100);
-
-        // Write Block
-        if (outSize > 0)
-        {
-            int sizeToWrite;
-            * (unsigned int*) out_buff = LITTLE_ENDIAN_32(outSize);
-            if (blockChecksum)
-            {
-                unsigned int checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED);
-                * (unsigned int*) (out_buff+4+outSize) = LITTLE_ENDIAN_32(checksum);
-            }
-            sizeToWrite = 4 + outSize + (4*blockChecksum);
-            sizeCheck = fwrite(out_buff, 1, sizeToWrite, foutput);
-            if (sizeCheck!=(size_t)(sizeToWrite)) EXM_THROW(33, "Write error : cannot write compressed block");
-        }
-        else  // Copy Original Uncompressed
-        {
-            * (unsigned int*) out_buff = LITTLE_ENDIAN_32(((unsigned long)readSize)|0x80000000);   // Add Uncompressed flag
-            sizeCheck = fwrite(out_buff, 1, 4, foutput);
-            if (sizeCheck!=(size_t)(4)) EXM_THROW(34, "Write error : cannot write block header");
-            sizeCheck = fwrite(in_buff, 1, readSize, foutput);
-            if (sizeCheck!=readSize) EXM_THROW(35, "Write error : cannot write block");
-            if (blockChecksum)
-            {
-                unsigned int checksum = XXH32(in_buff, (int)readSize, LZ4S_CHECKSUM_SEED);
-                * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum);
-                sizeCheck = fwrite(out_buff, 1, 4, foutput);
-                if (sizeCheck!=(size_t)(4)) EXM_THROW(36, "Write error : cannot write block checksum");
-            }
-        }
-
-        // Read next block
-        readSize = fread(in_buff, (size_t)1, (size_t)blockSize, finput);
-    }
-
-    // End of Stream mark
-    * (unsigned int*) out_buff = LZ4S_EOS;
-    sizeCheck = fwrite(out_buff, 1, 4, foutput);
-    if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write end of stream");
-    compressedfilesize += 4;
-    if (streamChecksum)
-    {
-        unsigned int checksum = XXH32_digest(streamChecksumState);
-        * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum);
-        sizeCheck = fwrite(out_buff, 1, 4, foutput);
-        if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write stream checksum");
-        compressedfilesize += 4;
-    }
-
-    // Close & Free
-    free(in_buff);
-    free(out_buff);
-    free(headerBuffer);
-    fclose(finput);
-    fclose(foutput);
-
-    // Final Status
-    end = clock();
-    DISPLAYLEVEL(2, "\r%79s\r", "");
-    DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
-        (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
-    {
-        double seconds = (double)(end - start)/CLOCKS_PER_SEC;
-        DISPLAYLEVEL(4, "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024);
-    }
-
-    return 0;
-}
-
-
-unsigned long long decodeLegacyStream(FILE* finput, FILE* foutput)
-{
-    unsigned long long filesize = 0;
-    char* in_buff;
-    char* out_buff;
-    unsigned int blockSize;
-
-
-    // Allocate Memory
-    in_buff = (char*)malloc(LZ4_compressBound(LEGACY_BLOCKSIZE));
-    out_buff = (char*)malloc(LEGACY_BLOCKSIZE);
-    if (!in_buff || !out_buff) EXM_THROW(51, "Allocation error : not enough memory");
-
-    // Main Loop
-    while (1)
-    {
-        int decodeSize;
-        size_t sizeCheck;
-
-        // Block Size
-        sizeCheck = fread(&blockSize, 1, 4, finput);
-        if (sizeCheck==0) break;                   // Nothing to read : file read is completed
-        blockSize = LITTLE_ENDIAN_32(blockSize);   // Convert to Little Endian
-        if (blockSize > LZ4_COMPRESSBOUND(LEGACY_BLOCKSIZE))
-        {   // Cannot read next block : maybe new stream ?
-            fseek(finput, -4, SEEK_CUR);
-            break;
-        }
-
-        // Read Block
-        sizeCheck = fread(in_buff, 1, blockSize, finput);
-
-        // Decode Block
-        decodeSize = LZ4_decompress_safe(in_buff, out_buff, blockSize, LEGACY_BLOCKSIZE);
-        if (decodeSize < 0) EXM_THROW(52, "Decoding Failed ! Corrupted input detected !");
-        filesize += decodeSize;
-
-        // Write Block
-        sizeCheck = fwrite(out_buff, 1, decodeSize, foutput);
-        if (sizeCheck != (size_t)decodeSize) EXM_THROW(53, "Write error : cannot write decoded block into output\n");
-    }
-
-    // Free
-    free(in_buff);
-    free(out_buff);
-
-    return filesize;
-}
-
-
-unsigned long long decodeLZ4S(FILE* finput, FILE* foutput)
-{
-    unsigned long long filesize = 0;
-    char* in_buff;
-    char* out_buff, *out_start, *out_end;
-    unsigned char descriptor[LZ4S_MAXHEADERSIZE];
-    size_t nbReadBytes;
-    int decodedBytes=0;
-    unsigned int maxBlockSize;
-    size_t sizeCheck;
-    int blockChecksumFlag, streamChecksumFlag, blockIndependenceFlag;
-    void* streamChecksumState=NULL;
-    int (*decompressionFunction)(const char*, char*, int, int) = LZ4_decompress_safe;
-    unsigned int prefix64k = 0;
-
-    // Decode stream descriptor
-    nbReadBytes = fread(descriptor, 1, 3, finput);
-    if (nbReadBytes != 3) EXM_THROW(61, "Unreadable header");
-    {
-        int version       = (descriptor[0] >> 6) & _2BITS;
-        int streamSize    = (descriptor[0] >> 3) & _1BIT;
-        int reserved1     = (descriptor[0] >> 1) & _1BIT;
-        int dictionary    = (descriptor[0] >> 0) & _1BIT;
-
-        int reserved2     = (descriptor[1] >> 7) & _1BIT;
-        int blockSizeId   = (descriptor[1] >> 4) & _3BITS;
-        int reserved3     = (descriptor[1] >> 0) & _4BITS;
-        int checkBits     = (descriptor[2] >> 0) & _8BITS;
-        int checkBits_xxh32;
-
-        blockIndependenceFlag=(descriptor[0] >> 5) & _1BIT;
-        blockChecksumFlag = (descriptor[0] >> 4) & _1BIT;
-        streamChecksumFlag= (descriptor[0] >> 2) & _1BIT;
-
-        if (version != 1)       EXM_THROW(62, "Wrong version number");
-        if (streamSize == 1)    EXM_THROW(64, "Does not support stream size");
-        if (reserved1 != 0)     EXM_THROW(65, "Wrong value for reserved bits");
-        if (dictionary == 1)    EXM_THROW(66, "Does not support dictionary");
-        if (reserved2 != 0)     EXM_THROW(67, "Wrong value for reserved bits");
-        if (blockSizeId < 4)    EXM_THROW(68, "Unsupported block size");
-        if (reserved3 != 0)     EXM_THROW(67, "Wrong value for reserved bits");
-        maxBlockSize = LZ4S_GetBlockSize_FromBlockId(blockSizeId);
-        // Checkbits verification
-        descriptor[1] &= 0xF0;
-        checkBits_xxh32 = XXH32(descriptor, 2, LZ4S_CHECKSUM_SEED);
-        checkBits_xxh32 = LZ4S_GetCheckBits_FromXXH(checkBits_xxh32);
-        if (checkBits != checkBits_xxh32) EXM_THROW(69, "Stream descriptor error detected");
-    }
-
-    if (!blockIndependenceFlag)
-    {
-        decompressionFunction = LZ4_decompress_safe_withPrefix64k;
-        prefix64k = 64 KB;
-    }
-
-    // Allocate Memory
-    {
-        unsigned int outbuffSize = prefix64k+maxBlockSize;
-        in_buff  = (char*)malloc(maxBlockSize);
-        if (outbuffSize < MIN_STREAM_BUFSIZE) outbuffSize = MIN_STREAM_BUFSIZE;
-        out_buff = (char*)malloc(outbuffSize);
-        out_end = out_buff + outbuffSize;
-        out_start = out_buff + prefix64k;
-        if (!in_buff || !out_buff) EXM_THROW(70, "Allocation error : not enough memory");
-    }
-    if (streamChecksumFlag) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED);
-
-    // Main Loop
-    while (1)
-    {
-        unsigned int blockSize, uncompressedFlag;
-
-        // Block Size
-        nbReadBytes = fread(&blockSize, 1, 4, finput);
-        if( nbReadBytes != 4 ) EXM_THROW(71, "Read error : cannot read next block size");
-        if (blockSize == LZ4S_EOS) break;          // End of Stream Mark : stream is completed
-        blockSize = LITTLE_ENDIAN_32(blockSize);   // Convert to little endian
-        uncompressedFlag = blockSize >> 31;
-        blockSize &= 0x7FFFFFFF;
-        if (blockSize > maxBlockSize) EXM_THROW(72, "Error : invalid block size");
-
-        // Read Block
-        nbReadBytes = fread(in_buff, 1, blockSize, finput);
-        if( nbReadBytes != blockSize ) EXM_THROW(73, "Read error : cannot read data block" );
-
-        // Check Block
-        if (blockChecksumFlag)
-        {
-            unsigned int checksum = XXH32(in_buff, blockSize, LZ4S_CHECKSUM_SEED);
-            unsigned int readChecksum;
-            sizeCheck = fread(&readChecksum, 1, 4, finput);
-            if( sizeCheck != 4 ) EXM_THROW(74, "Read error : cannot read next block size");
-            readChecksum = LITTLE_ENDIAN_32(readChecksum);   // Convert to little endian
-            if (checksum != readChecksum) EXM_THROW(75, "Error : invalid block checksum detected");
-        }
-
-        if (uncompressedFlag)
-        {
-            // Write uncompressed Block
-            sizeCheck = fwrite(in_buff, 1, blockSize, foutput);
-            if (sizeCheck != (size_t)blockSize) EXM_THROW(76, "Write error : cannot write data block");
-            filesize += blockSize;
-            if (streamChecksumFlag) XXH32_update(streamChecksumState, in_buff, blockSize);
-            if (!blockIndependenceFlag)
-            {
-                if (blockSize >= prefix64k)
-                {
-                    memcpy(out_buff, in_buff + (blockSize - prefix64k), prefix64k);   // Required for reference for next blocks
-                    out_start = out_buff + prefix64k;
-                    continue;
-                }
-                else
-                {
-                    memcpy(out_start, in_buff, blockSize);
-                    decodedBytes = blockSize;
-                }
-            }
-        }
-        else
-        {
-            // Decode Block
-            decodedBytes = decompressionFunction(in_buff, out_start, blockSize, maxBlockSize);
-            if (decodedBytes < 0) EXM_THROW(77, "Decoding Failed ! Corrupted input detected !");
-            filesize += decodedBytes;
-            if (streamChecksumFlag) XXH32_update(streamChecksumState, out_start, decodedBytes);
-
-            // Write Block
-            sizeCheck = fwrite(out_start, 1, decodedBytes, foutput);
-            if (sizeCheck != (size_t)decodedBytes) EXM_THROW(78, "Write error : cannot write decoded block\n");
-        }
-
-        if (!blockIndependenceFlag)
-        {
-            out_start += decodedBytes;
-            if ((size_t)(out_end - out_start) < (size_t)maxBlockSize)
-            {
-                memcpy(out_buff, out_start - prefix64k, prefix64k);
-                out_start = out_buff + prefix64k;
-            }
-        }
-    }
-
-    // Stream Checksum
-    if (streamChecksumFlag)
-    {
-        unsigned int checksum = XXH32_digest(streamChecksumState);
-        unsigned int readChecksum;
-        sizeCheck = fread(&readChecksum, 1, 4, finput);
-        if (sizeCheck != 4) EXM_THROW(74, "Read error : cannot read stream checksum");
-        readChecksum = LITTLE_ENDIAN_32(readChecksum);   // Convert to little endian
-        if (checksum != readChecksum) EXM_THROW(75, "Error : invalid stream checksum detected");
-    }
-
-    // Free
-    free(in_buff);
-    free(out_buff);
-
-    return filesize;
-}
-
-
-unsigned long long selectDecoder( FILE* finput,  FILE* foutput)
-{
-    unsigned int magicNumber, size;
-    int errorNb;
-    size_t nbReadBytes;
-
-    // Check Archive Header
-    nbReadBytes = fread(&magicNumber, 1, MAGICNUMBER_SIZE, finput);
-    if (nbReadBytes==0) return 0;                  // EOF
-    if (nbReadBytes != MAGICNUMBER_SIZE) EXM_THROW(41, "Unrecognized header : Magic Number unreadable");
-    magicNumber = LITTLE_ENDIAN_32(magicNumber);   // Convert to Little Endian format
-    if (LZ4S_isSkippableMagicNumber(magicNumber)) magicNumber = LZ4S_SKIPPABLE0;  // fold skippable magic numbers
-
-    switch(magicNumber)
-    {
-    case LZ4S_MAGICNUMBER:
-        return DEFAULT_DECOMPRESSOR(finput, foutput);
-    case LEGACY_MAGICNUMBER:
-        DISPLAYLEVEL(4, "Detected : Legacy format \n");
-        return decodeLegacyStream(finput, foutput);
-    case LZ4S_SKIPPABLE0:
-        DISPLAYLEVEL(4, "Skipping detected skippable area \n");
-        nbReadBytes = fread(&size, 1, 4, finput);
-        if (nbReadBytes != 4) EXM_THROW(42, "Stream error : skippable size unreadable");
-        size = LITTLE_ENDIAN_32(size);     // Convert to Little Endian format
-        errorNb = fseek(finput, size, SEEK_CUR);
-        if (errorNb != 0) EXM_THROW(43, "Stream error : cannot skip skippable area");
-        return selectDecoder(finput, foutput);
-    EXTENDED_FORMAT;
-    default:
-        if (ftell(finput) == MAGICNUMBER_SIZE) EXM_THROW(44,"Unrecognized header : file cannot be decoded");   // Wrong magic number at the beginning of 1st stream
-        DISPLAYLEVEL(2, "Stream followed by unrecognized data\n");
-        return 0;
-    }
-}
-
-
-int decodeFile(char* input_filename, char* output_filename)
-{
-    unsigned long long filesize = 0, decodedSize=0;
-    FILE* finput;
-    FILE* foutput;
-    clock_t start, end;
-
-
-    // Init
-    start = clock();
-    get_fileHandle(input_filename, output_filename, &finput, &foutput);
-
-    // Loop over multiple streams
-    do
-    {
-        decodedSize = selectDecoder(finput, foutput);
-        filesize += decodedSize;
-    } while (decodedSize);
-
-    // Final Status
-    end = clock();
-    DISPLAYLEVEL(2, "\r%79s\r", "");
-    DISPLAYLEVEL(2, "Successfully decoded %llu bytes                           \n", filesize);
-    {
-        double seconds = (double)(end - start)/CLOCKS_PER_SEC;
-        DISPLAYLEVEL(4, "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024);
-    }
-
-    // Close
-    fclose(finput);
-    fclose(foutput);
-
-    // Error status = OK
-    return 0;
-}
-
-
-void waitEnter()
-{
-    DISPLAY("Press enter to continue...\n");
-    getchar();
-}
-
-
-int main(int argc, char** argv)
-{
-    int i,
-        cLevel=0,
-        decode=0,
-        bench=0,
-        legacy_format=0,
-        forceStdout=0,
-        forceCompress=0,
-        pause=0;
-    char* input_filename=0;
-    char* output_filename=0;
-    char* dynNameSpace=0;
-    char nullOutput[] = NULL_OUTPUT;
-    char extension[] = LZ4_EXTENSION;
-
-    // Init
-    programName = argv[0];
-
-    for(i=1; i<argc; i++)
-    {
-        char* argument = argv[i];
-
-        if(!argument) continue;   // Protection if argument empty
-
-        // Decode command (note : aggregated commands are allowed)
-        if (argument[0]=='-')
-        {
-            // '-' means stdin/stdout
-            if (argument[1]==0)
-            {
-                if (!input_filename) input_filename=stdinmark;
-                else output_filename=stdoutmark;
-            }
-
-            while (argument[1]!=0)
-            {
-                argument ++;
-
-#if !defined(DISABLE_LZ4C_LEGACY_OPTIONS)
-                // Legacy options (-c0, -c1, -hc, -y, -s)
-                if ((argument[0]=='c') && (argument[1]=='0')) { cLevel=0; argument++; continue; }          // -c0 (fast compression)
-                if ((argument[0]=='c') && (argument[1]=='1')) { cLevel=9; argument++; continue; }          // -c1 (high compression)
-                if ((argument[0]=='h') && (argument[1]=='c')) { cLevel=9; argument++; continue; }          // -hc (high compression)
-                if (*argument=='y') { overwrite=1; continue; }                                             // -y (answer 'yes' to overwrite permission)
-                if (*argument=='s') { displayLevel=1; continue; }                                          // -s (silent mode)
-#endif // DISABLE_LZ4C_LEGACY_OPTIONS
-
-                switch(argument[0])
-                {
-                    // Display help
-                case 'V': DISPLAY(WELCOME_MESSAGE); return 0;   // Version
-                case 'h': usage_advanced(); return 0;
-                case 'H': usage_advanced(); usage_longhelp(); return 0;
-
-                    // Compression (default)
-                case 'z': forceCompress = 1; break;
-
-                    // Compression level
-                case '0':
-                case '1':
-                case '2':
-                case '3':
-                case '4':
-                case '5':
-                case '6':
-                case '7':
-                case '8':
-                case '9': cLevel=*argument -'0'; break;
-
-                    // Use Legacy format (for Linux kernel compression)
-                case 'l': legacy_format=1; break;
-
-                    // Decoding
-                case 'd': decode=1; break;
-
-                    // Force stdout, even if stdout==console
-                case 'c': forceStdout=1; output_filename=stdoutmark; displayLevel=1; break;
-
-                    // Test
-                case 't': decode=1; output_filename=nulmark; break;
-
-                    // Overwrite
-                case 'f': overwrite=1; break;
-
-                    // Verbose mode
-                case 'v': displayLevel=4; break;
-
-                    // Quiet mode
-                case 'q': displayLevel--; break;
-
-                    // keep source file (default anyway, so useless) (for xz/lzma compatibility)
-                case 'k': break;
-
-                    // Modify Block Properties
-                case 'B':
-                    while (argument[1]!=0)
-                    {
-                        int exitBlockProperties=0;
-                        switch(argument[1])
-                        {
-                        case '4':
-                        case '5':
-                        case '6':
-                        case '7':
-                        {
-                            int B = argument[1] - '0';
-                            int S = 1 << (8 + 2*B);
-                            BMK_SetBlocksize(S);
-                            blockSizeId = B;
-                            argument++;
-                            break;
-                        }
-                        case 'D': blockIndependence = 0, argument++; break;
-                        case 'X': blockChecksum = 1, argument ++; break;
-                        default : exitBlockProperties=1;
-                        }
-                        if (exitBlockProperties) break;
-                    }
-                    break;
-
-                    // Modify Stream properties
-                case 'S': if (argument[1]=='x') { streamChecksum=0; argument++; break; } else { badusage(); }
-
-                    // Benchmark
-                case 'b': 
-                    abort(); bench=1; break;
-
-                    // Modify Nb Iterations (benchmark only)
-                case 'i':
-                    if ((argument[1] >='1') && (argument[1] <='9'))
-                    {
-                        int iters = argument[1] - '0';
-                        BMK_SetNbIterations(iters);
-                        argument++;
-                    }
-                    break;
-
-                    // Pause at the end (hidden option)
-                case 'p': pause=1; BMK_SetPause(); break;
-
-                EXTENDED_ARGUMENTS;
-
-                    // Unrecognised command
-                default : badusage();
-                }
-            }
-            continue;
-        }
-
-        // first provided filename is input
-        if (!input_filename) { input_filename=argument; continue; }
-
-        // second provided filename is output
-        if (!output_filename)
-        {
-            output_filename=argument;
-            if (!strcmp (output_filename, nullOutput)) output_filename = nulmark;
-            continue;
-        }
-    }
-
-    DISPLAYLEVEL(3, WELCOME_MESSAGE);
-    DISPLAYLEVEL(4, "Blocks size : %i KB\n", (1 << ((blockSizeId*2)-2)));
-
-    // No input filename ==> use stdin
-    if(!input_filename) { input_filename=stdinmark; }
-
-    // Check if input or output are defined as console; trigger an error in this case
-    if (!strcmp(input_filename, stdinmark)  && IS_CONSOLE(stdin)                 ) badusage();
-
-    // Check if benchmark is selected
-    if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart, cLevel);
-
-    // No output filename ==> try to select one automatically (when possible)
-    while (!output_filename)
-    {
-        if (!IS_CONSOLE(stdout)) { output_filename=stdoutmark; break; }   // Default to stdout whenever possible (i.e. not a console)
-        if ((!decode) && !(forceCompress))   // auto-determine compression or decompression, based on file extension
-        {
-            size_t l = strlen(input_filename);
-            if (!strcmp(input_filename+(l-4), LZ4_EXTENSION)) decode=1;
-        }
-        if (!decode)   // compression to file
-        {
-            size_t l = strlen(input_filename);
-            dynNameSpace = (char*)calloc(1,l+5);
-            output_filename = dynNameSpace;
-            strcpy(output_filename, input_filename);
-            strcpy(output_filename+l, LZ4_EXTENSION);
-            DISPLAYLEVEL(2, "Compressed filename will be : %s \n", output_filename);
-            break;
-        }
-        // decompression to file (automatic name will work only if input filename has correct format extension)
-        {
-            size_t outl;
-            size_t inl = strlen(input_filename);
-            dynNameSpace = (char*)calloc(1,inl+1);
-            output_filename = dynNameSpace;
-            strcpy(output_filename, input_filename);
-            outl = inl;
-            if (inl>4)
-                while ((outl >= inl-4) && (input_filename[outl] ==  extension[outl-inl+4])) output_filename[outl--]=0;
-            if (outl != inl-5) { DISPLAYLEVEL(1, "Cannot determine an output filename\n"); badusage(); }
-            DISPLAYLEVEL(2, "Decoding file %s \n", output_filename);
-        }
-    }
-
-    // No warning message in pure pipe mode (stdin + stdout)
-    if (!strcmp(input_filename, stdinmark) && !strcmp(output_filename,stdoutmark) && (displayLevel==2)) displayLevel=1;
-
-    // Check if input or output are defined as console; trigger an error in this case
-    if (!strcmp(input_filename, stdinmark)  && IS_CONSOLE(stdin)                 ) badusage();
-    if (!strcmp(output_filename,stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) badusage();
-
-    // Decompress input if selected
-    if (decode) decodeFile(input_filename, output_filename);
-    else
-    // compression is default action
-    {
-        if (legacy_format)
-        {
-            DISPLAYLEVEL(3, "! Generating compressed LZ4 using Legacy format (deprecated !) ! \n");
-            legacy_compress_file(input_filename, output_filename, cLevel);
-        }
-        else
-        {
-            DEFAULT_COMPRESSOR(input_filename, output_filename, cLevel);
-        }
-    }
-
-    if (pause) waitEnter();
-    free(dynNameSpace);
-    return 0;
-}
diff --git a/lz4/lz4cli/COPYING b/lz4/lz4cli/COPYING
new file mode 100644
index 0000000..d159169
--- /dev/null
+++ b/lz4/lz4cli/COPYING
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/lz4/lz4cli/README.md b/lz4/lz4cli/README.md
new file mode 100644
index 0000000..2ad0449
--- /dev/null
+++ b/lz4/lz4cli/README.md
@@ -0,0 +1,71 @@
+Command Line Interface for LZ4 library
+============================================
+
+Command Line Interface (CLI) can be created using the `make` command without any additional parameters.
+There are also multiple targets that create different variations of CLI:
+- `lz4` : default CLI, with a command line syntax close to gzip
+- `lz4c` : Same as `lz4` with additional support legacy lz4 commands (incompatible with gzip)
+- `lz4c32` : Same as `lz4c`, but forced to compile in 32-bits mode
+
+
+#### Aggregation of parameters
+CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`.
+
+
+
+#### Benchmark in Command Line Interface
+CLI includes in-memory compression benchmark module for lz4.
+The benchmark is conducted using a given filename.
+The file is read into memory.
+It makes benchmark more precise as it eliminates I/O overhead.
+
+The benchmark measures ratio, compressed size, compression and decompression speed.
+One can select compression levels starting from `-b` and ending with `-e`.
+The `-i` parameter selects a number of seconds used for each of tested levels.
+
+
+
+#### Usage of Command Line Interface
+The full list of commands can be obtained with `-h` or `-H` parameter:
+```
+Usage :
+      lz4 [arg] [input] [output]
+
+input   : a filename
+          with no FILE, or when FILE is - or stdin, read standard input
+Arguments :
+ -1     : Fast compression (default)
+ -9     : High compression
+ -d     : decompression (default for .lz4 extension)
+ -z     : force compression
+ -f     : overwrite output without prompting
+--rm    : remove source file(s) after successful de/compression
+ -h/-H  : display help/long help and exit
+
+Advanced arguments :
+ -V     : display Version number and exit
+ -v     : verbose mode
+ -q     : suppress warnings; specify twice to suppress errors too
+ -c     : force write to standard output, even if it is the console
+ -t     : test compressed file integrity
+ -m     : multiple input files (implies automatic output filenames)
+ -r     : operate recursively on directories (sets also -m)
+ -l     : compress using Legacy format (Linux kernel compression)
+ -B#    : Block size [4-7] (default : 7)
+ -BD    : Block dependency (improve compression ratio)
+--no-frame-crc : disable stream checksum (default:enabled)
+--content-size : compressed frame includes original size (default:not present)
+--[no-]sparse  : sparse mode (default:enabled on file, disabled on stdout)
+Benchmark arguments :
+ -b#    : benchmark file(s), using # compression level (default : 1)
+ -e#    : test all compression levels from -bX to # (default : 1)
+ -i#    : minimum evaluation time in seconds (default : 3s)
+ -B#    : cut file into independent blocks of size # bytes [32+]
+                      or predefined block size [4-7] (default: 7)
+```
+
+#### License
+
+All files in this directory are licensed under GPL-v2.
+See [COPYING](COPYING) for details.
+The text of the license is also included at the top of each source file.
diff --git a/lz4/lz4cli/bench.c b/lz4/lz4cli/bench.c
new file mode 100644
index 0000000..77a9e3f
--- /dev/null
+++ b/lz4/lz4cli/bench.c
@@ -0,0 +1,521 @@
+/*
+    bench.c - Demo program to benchmark open-source compression algorithms
+    Copyright (C) Yann Collet 2012-2016
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+*/
+
+
+/*-************************************
+*  Compiler options
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)    /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/* *************************************
+*  Includes
+***************************************/
+#include "platform.h"    /* Compiler options */
+#include "util.h"        /* UTIL_GetFileSize, UTIL_sleep */
+#include <stdlib.h>      /* malloc, free */
+#include <string.h>      /* memset */
+#include <stdio.h>       /* fprintf, fopen, ftello */
+#include <time.h>        /* clock_t, clock, CLOCKS_PER_SEC */
+
+#include "datagen.h"     /* RDG_genBuffer */
+#include "xxhash.h"
+
+
+#include "lz4.h"
+#define COMPRESSOR0 LZ4_compress_local
+static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) { (void)clevel; return LZ4_compress_default(src, dst, srcSize, dstSize); }
+#include "lz4hc.h"
+#define COMPRESSOR1 LZ4_compress_HC
+#define DEFAULTCOMPRESSOR COMPRESSOR0
+#define LZ4_isError(errcode) (errcode==0)
+
+
+/* *************************************
+*  Constants
+***************************************/
+#ifndef LZ4_GIT_COMMIT_STRING
+#  define LZ4_GIT_COMMIT_STRING ""
+#else
+#  define LZ4_GIT_COMMIT_STRING LZ4_EXPAND_AND_QUOTE(LZ4_GIT_COMMIT)
+#endif
+
+#define NBSECONDS             3
+#define TIMELOOP_MICROSEC     1*1000000ULL /* 1 second */
+#define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */
+#define COOLPERIOD_SEC        10
+#define DECOMP_MULT           2 /* test decompression DECOMP_MULT times longer than compression */
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
+
+static U32 g_compressibilityDefault = 50;
+
+
+/* *************************************
+*  console display
+***************************************/
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
+
+#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
+            if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
+            { g_time = clock(); DISPLAY(__VA_ARGS__); \
+            if (g_displayLevel>=4) fflush(stdout); } }
+static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+
+
+/* *************************************
+*  Exceptions
+***************************************/
+#ifndef DEBUG
+#  define DEBUG 0
+#endif
+#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
+#define EXM_THROW(error, ...)                                             \
+{                                                                         \
+    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+    DISPLAYLEVEL(1, "Error %i : ", error);                                \
+    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
+    DISPLAYLEVEL(1, "\n");                                                \
+    exit(error);                                                          \
+}
+
+
+/* *************************************
+*  Benchmark Parameters
+***************************************/
+static U32 g_nbSeconds = NBSECONDS;
+static size_t g_blockSize = 0;
+int g_additionalParam = 0;
+
+void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; }
+
+void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; }
+
+void BMK_SetNbSeconds(unsigned nbSeconds)
+{
+    g_nbSeconds = nbSeconds;
+    DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbSeconds);
+}
+
+void BMK_SetBlockSize(size_t blockSize)
+{
+    g_blockSize = blockSize;
+}
+
+
+/* ********************************************************
+*  Bench functions
+**********************************************************/
+typedef struct {
+    const char* srcPtr;
+    size_t srcSize;
+    char*  cPtr;
+    size_t cRoom;
+    size_t cSize;
+    char*  resPtr;
+    size_t resSize;
+} blockParam_t;
+
+struct compressionParameters
+{
+    int (*compressionFunction)(const char* src, char* dst, int srcSize, int dstSize, int cLevel);
+};
+
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
+                        const char* displayName, int cLevel,
+                        const size_t* fileSizes, U32 nbFiles)
+{
+    size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
+    U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
+    blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
+    size_t const maxCompressedSize = LZ4_compressBound((int)srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
+    void* const compressedBuffer = malloc(maxCompressedSize);
+    void* const resultBuffer = malloc(srcSize);
+    U32 nbBlocks;
+    UTIL_time_t ticksPerSecond;
+    struct compressionParameters compP;
+    int cfunctionId;
+
+    /* checks */
+    if (!compressedBuffer || !resultBuffer || !blockTable)
+        EXM_THROW(31, "allocation error : not enough memory");
+
+    /* init */
+    if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
+    UTIL_initTimer(&ticksPerSecond);
+
+    /* Init */
+    if (cLevel < LZ4HC_CLEVEL_MIN) cfunctionId = 0; else cfunctionId = 1;
+    switch (cfunctionId)
+    {
+#ifdef COMPRESSOR0
+    case 0 : compP.compressionFunction = COMPRESSOR0; break;
+#endif
+#ifdef COMPRESSOR1
+    case 1 : compP.compressionFunction = COMPRESSOR1; break;
+#endif
+    default : compP.compressionFunction = DEFAULTCOMPRESSOR;
+    }
+
+    /* Init blockTable data */
+    {   const char* srcPtr = (const char*)srcBuffer;
+        char* cPtr = (char*)compressedBuffer;
+        char* resPtr = (char*)resultBuffer;
+        U32 fileNb;
+        for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) {
+            size_t remaining = fileSizes[fileNb];
+            U32 const nbBlocksforThisFile = (U32)((remaining + (blockSize-1)) / blockSize);
+            U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
+            for ( ; nbBlocks<blockEnd; nbBlocks++) {
+                size_t const thisBlockSize = MIN(remaining, blockSize);
+                blockTable[nbBlocks].srcPtr = srcPtr;
+                blockTable[nbBlocks].cPtr = cPtr;
+                blockTable[nbBlocks].resPtr = resPtr;
+                blockTable[nbBlocks].srcSize = thisBlockSize;
+                blockTable[nbBlocks].cRoom = LZ4_compressBound((int)thisBlockSize);
+                srcPtr += thisBlockSize;
+                cPtr += blockTable[nbBlocks].cRoom;
+                resPtr += thisBlockSize;
+                remaining -= thisBlockSize;
+    }   }   }
+
+    /* warmimg up memory */
+    RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
+
+    /* Bench */
+    {   U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
+        U64 const crcOrig = XXH64(srcBuffer, srcSize, 0);
+        UTIL_time_t coolTime;
+        U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 100;
+        U64 totalCTime=0, totalDTime=0;
+        U32 cCompleted=0, dCompleted=0;
+#       define NB_MARKS 4
+        const char* const marks[NB_MARKS] = { " |", " /", " =",  "\\" };
+        U32 markNb = 0;
+        size_t cSize = 0;
+        double ratio = 0.;
+
+        UTIL_getTime(&coolTime);
+        DISPLAYLEVEL(2, "\r%79s\r", "");
+        while (!cCompleted || !dCompleted) {
+            UTIL_time_t clockStart;
+            U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
+
+            /* overheat protection */
+            if (UTIL_clockSpanMicro(coolTime, ticksPerSecond) > ACTIVEPERIOD_MICROSEC) {
+                DISPLAYLEVEL(2, "\rcooling down ...    \r");
+                UTIL_sleep(COOLPERIOD_SEC);
+                UTIL_getTime(&coolTime);
+            }
+
+            /* Compression */
+            DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
+            if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */
+
+            UTIL_sleepMilli(1);  /* give processor time to other processes */
+            UTIL_waitForNextTick(ticksPerSecond);
+            UTIL_getTime(&clockStart);
+
+            if (!cCompleted) {   /* still some time to do compression tests */
+                U32 nbLoops = 0;
+                do {
+                    U32 blockNb;
+                    for (blockNb=0; blockNb<nbBlocks; blockNb++) {
+                        size_t const rSize = compP.compressionFunction(blockTable[blockNb].srcPtr, blockTable[blockNb].cPtr, (int)blockTable[blockNb].srcSize, (int)blockTable[blockNb].cRoom, cLevel);
+                        if (LZ4_isError(rSize)) EXM_THROW(1, "LZ4_compress() failed");
+                        blockTable[blockNb].cSize = rSize;
+                    }
+                    nbLoops++;
+                } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < clockLoop);
+                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
+                    if (clockSpan < fastestC*nbLoops) fastestC = clockSpan / nbLoops;
+                    totalCTime += clockSpan;
+                    cCompleted = totalCTime>maxTime;
+            }   }
+
+            cSize = 0;
+            { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) cSize += blockTable[blockNb].cSize; }
+            cSize += !cSize;  /* avoid div by 0 */
+            ratio = (double)srcSize / (double)cSize;
+            markNb = (markNb+1) % NB_MARKS;
+            DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r",
+                    marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
+                    (double)srcSize / fastestC );
+
+            (void)fastestD; (void)crcOrig;   /*  unused when decompression disabled */
+#if 1
+            /* Decompression */
+            if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
+
+            UTIL_sleepMilli(1); /* give processor time to other processes */
+            UTIL_waitForNextTick(ticksPerSecond);
+            UTIL_getTime(&clockStart);
+
+            if (!dCompleted) {
+                U32 nbLoops = 0;
+                do {
+                    U32 blockNb;
+                    for (blockNb=0; blockNb<nbBlocks; blockNb++) {
+                        size_t const regenSize = LZ4_decompress_safe(blockTable[blockNb].cPtr, blockTable[blockNb].resPtr, (int)blockTable[blockNb].cSize, (int)blockTable[blockNb].srcSize);
+                        if (LZ4_isError(regenSize)) {
+                            DISPLAY("LZ4_decompress_safe() failed on block %u  \n", blockNb);
+                            clockLoop = 0;   /* force immediate test end */
+                            break;
+                        }
+
+                        blockTable[blockNb].resSize = regenSize;
+                    }
+                    nbLoops++;
+                } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < DECOMP_MULT*clockLoop);
+                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
+                    if (clockSpan < fastestD*nbLoops) fastestD = clockSpan / nbLoops;
+                    totalDTime += clockSpan;
+                    dCompleted = totalDTime>(DECOMP_MULT*maxTime);
+            }   }
+
+            markNb = (markNb+1) % NB_MARKS;
+            DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r",
+                    marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
+                    (double)srcSize / fastestC,
+                    (double)srcSize / fastestD );
+
+            /* CRC Checking */
+            {   U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
+                if (crcOrig!=crcCheck) {
+                    size_t u;
+                    DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
+                    for (u=0; u<srcSize; u++) {
+                        if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) {
+                            U32 segNb, bNb, pos;
+                            size_t bacc = 0;
+                            DISPLAY("Decoding error at pos %u ", (U32)u);
+                            for (segNb = 0; segNb < nbBlocks; segNb++) {
+                                if (bacc + blockTable[segNb].srcSize > u) break;
+                                bacc += blockTable[segNb].srcSize;
+                            }
+                            pos = (U32)(u - bacc);
+                            bNb = pos / (128 KB);
+                            DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos);
+                            break;
+                        }
+                        if (u==srcSize-1) {  /* should never happen */
+                            DISPLAY("no difference detected\n");
+                    }   }
+                    break;
+            }   }   /* CRC Checking */
+#endif
+        }   /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
+
+        if (g_displayLevel == 1) {
+            double cSpeed = (double)srcSize / fastestC;
+            double dSpeed = (double)srcSize / fastestD;
+            if (g_additionalParam)
+                DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
+            else
+                DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
+        }
+        DISPLAYLEVEL(2, "%2i#\n", cLevel);
+    }   /* Bench */
+
+    /* clean up */
+    free(blockTable);
+    free(compressedBuffer);
+    free(resultBuffer);
+    return 0;
+}
+
+
+static size_t BMK_findMaxMem(U64 requiredMem)
+{
+    size_t step = 64 MB;
+    BYTE* testmem=NULL;
+
+    requiredMem = (((requiredMem >> 26) + 1) << 26);
+    requiredMem += 2*step;
+    if (requiredMem > maxMemory) requiredMem = maxMemory;
+
+    while (!testmem) {
+        if (requiredMem > step) requiredMem -= step;
+        else requiredMem >>= 1;
+        testmem = (BYTE*) malloc ((size_t)requiredMem);
+    }
+    free (testmem);
+
+    /* keep some space available */
+    if (requiredMem > step) requiredMem -= step;
+    else requiredMem >>= 1;
+
+    return (size_t)requiredMem;
+}
+
+
+static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
+                            const char* displayName, int cLevel, int cLevelLast,
+                            const size_t* fileSizes, unsigned nbFiles)
+{
+    int l;
+
+    const char* pch = strrchr(displayName, '\\'); /* Windows */
+    if (!pch) pch = strrchr(displayName, '/'); /* Linux */
+    if (pch) displayName = pch+1;
+
+    SET_REALTIME_PRIORITY;
+
+    if (g_displayLevel == 1 && !g_additionalParam)
+        DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10));
+
+    if (cLevelLast < cLevel) cLevelLast = cLevel;
+
+    for (l=cLevel; l <= cLevelLast; l++) {
+        BMK_benchMem(srcBuffer, benchedSize,
+                     displayName, l,
+                     fileSizes, nbFiles);
+    }
+}
+
+
+/*! BMK_loadFiles() :
+    Loads `buffer` with content of files listed within `fileNamesTable`.
+    At most, fills `buffer` entirely */
+static void BMK_loadFiles(void* buffer, size_t bufferSize,
+                          size_t* fileSizes,
+                          const char** fileNamesTable, unsigned nbFiles)
+{
+    size_t pos = 0, totalSize = 0;
+    unsigned n;
+    for (n=0; n<nbFiles; n++) {
+        FILE* f;
+        U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
+        if (UTIL_isDirectory(fileNamesTable[n])) {
+            DISPLAYLEVEL(2, "Ignoring %s directory...       \n", fileNamesTable[n]);
+            fileSizes[n] = 0;
+            continue;
+        }
+        f = fopen(fileNamesTable[n], "rb");
+        if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
+        DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
+        if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n;   /* buffer too small - stop after this file */
+        { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
+          if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
+          pos += readSize; }
+        fileSizes[n] = (size_t)fileSize;
+        totalSize += (size_t)fileSize;
+        fclose(f);
+    }
+
+    if (totalSize == 0) EXM_THROW(12, "no data to bench");
+}
+
+static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
+                               int cLevel, int cLevelLast)
+{
+    void* srcBuffer;
+    size_t benchedSize;
+    size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
+    U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
+    char mfName[20] = {0};
+
+    if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes");
+
+    /* Memory allocation & restrictions */
+    benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
+    if (benchedSize==0) EXM_THROW(12, "not enough memory");
+    if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
+    if (benchedSize > LZ4_MAX_INPUT_SIZE) {
+        benchedSize = LZ4_MAX_INPUT_SIZE; 
+        DISPLAY("File(s) bigger than LZ4's max input size; testing %u MB only...\n", (U32)(benchedSize >> 20));
+    } else { 
+        if (benchedSize < totalSizeToLoad)
+            DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20));
+    }
+    srcBuffer = malloc(benchedSize + !benchedSize);   /* avoid alloc of zero */
+    if (!srcBuffer) EXM_THROW(12, "not enough memory");
+
+    /* Load input buffer */
+    BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
+
+    /* Bench */
+    snprintf (mfName, sizeof(mfName), " %u files", nbFiles);
+    {   const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
+        BMK_benchCLevel(srcBuffer, benchedSize,
+                        displayName, cLevel, cLevelLast,
+                        fileSizes, nbFiles);
+    }
+
+    /* clean up */
+    free(srcBuffer);
+    free(fileSizes);
+}
+
+
+static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility)
+{
+    char name[20] = {0};
+    size_t benchedSize = 10000000;
+    void* const srcBuffer = malloc(benchedSize);
+
+    /* Memory allocation */
+    if (!srcBuffer) EXM_THROW(21, "not enough memory");
+
+    /* Fill input buffer */
+    RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
+
+    /* Bench */
+    snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
+    BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1);
+
+    /* clean up */
+    free(srcBuffer);
+}
+
+
+int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
+                   int cLevel, int cLevelLast)
+{
+    double const compressibility = (double)g_compressibilityDefault / 100;
+
+    if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
+    if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX;
+    if (cLevelLast < cLevel) cLevelLast = cLevel;
+    if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
+
+    if (nbFiles == 0)
+        BMK_syntheticTest(cLevel, cLevelLast, compressibility);
+    else
+        BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast);
+    return 0;
+}
diff --git a/lz4/lz4cli/bench.h b/lz4/lz4cli/bench.h
new file mode 100644
index 0000000..15def93
--- /dev/null
+++ b/lz4/lz4cli/bench.h
@@ -0,0 +1,37 @@
+/*
+    bench.h - Demo program to benchmark open-source compression algorithm
+    Copyright (C) Yann Collet 2012-2016
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - LZ4 source repository : https://github.com/lz4/lz4
+    - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef BENCH_H_125623623633
+#define BENCH_H_125623623633
+
+#include <stddef.h>
+
+int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
+                   int cLevel, int cLevelLast);
+
+/* Set Parameters */
+void BMK_SetNbSeconds(unsigned nbLoops);
+void BMK_SetBlockSize(size_t blockSize);
+void BMK_setAdditionalParam(int additionalParam);
+void BMK_setNotificationLevel(unsigned level);
+
+#endif   /* BENCH_H_125623623633 */
diff --git a/lz4/lz4cli/datagen.c b/lz4/lz4cli/datagen.c
new file mode 100644
index 0000000..a61afc0
--- /dev/null
+++ b/lz4/lz4cli/datagen.c
@@ -0,0 +1,189 @@
+/*
+    datagen.c - compressible data generator test tool
+    Copyright (C) Yann Collet 2012-2016
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+   - LZ4 source repository : https://github.com/lz4/lz4
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+*  Includes
+**************************************/
+#include "platform.h"  /* Compiler options, SET_BINARY_MODE */
+#include "util.h"      /* U32 */
+#include <stdlib.h>    /* malloc */
+#include <stdio.h>     /* FILE, fwrite */
+#include <string.h>    /* memcpy */
+
+
+/**************************************
+*  Constants
+**************************************/
+#define KB *(1 <<10)
+
+#define PRIME1   2654435761U
+#define PRIME2   2246822519U
+
+
+/**************************************
+*  Local types
+**************************************/
+#define LTLOG 13
+#define LTSIZE (1<<LTLOG)
+#define LTMASK (LTSIZE-1)
+typedef BYTE litDistribTable[LTSIZE];
+
+
+
+/*********************************************************
+*  Local Functions
+*********************************************************/
+#define MIN(a,b)   ( (a) < (b) ? (a) :(b) )
+#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+static unsigned int RDG_rand(U32* src)
+{
+    U32 rand32 = *src;
+    rand32 *= PRIME1;
+    rand32 ^= PRIME2;
+    rand32  = RDG_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32;
+}
+
+
+static void RDG_fillLiteralDistrib(litDistribTable lt, double ld)
+{
+    BYTE const firstChar = ld <= 0.0 ? 0 : '(';
+    BYTE const lastChar  = ld <= 0.0 ? 255 : '}';
+    BYTE character = ld <= 0.0 ? 0 : '0';
+    U32 u = 0;
+
+    while (u<LTSIZE) {
+        U32 const weight = (U32)((double)(LTSIZE - u) * ld) + 1;
+        U32 const end = MIN(u+weight, LTSIZE);
+        while (u < end) lt[u++] = character;
+        character++;
+        if (character > lastChar) character = firstChar;
+    }
+}
+
+
+static BYTE RDG_genChar(U32* seed, const litDistribTable lt)
+{
+    U32 id = RDG_rand(seed) & LTMASK;
+    return (lt[id]);
+}
+
+
+#define RDG_DICTSIZE    (32 KB)
+#define RDG_RAND15BITS  ((RDG_rand(seed) >> 3) & 32767)
+#define RDG_RANDLENGTH  ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
+void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, litDistribTable lt, unsigned* seedPtr)
+{
+    BYTE* buffPtr = (BYTE*)buffer;
+    const U32 matchProba32 = (U32)(32768 * matchProba);
+    size_t pos = prefixSize;
+    U32* seed = seedPtr;
+
+    /* special case */
+    while (matchProba >= 1.0)
+    {
+        size_t size0 = RDG_rand(seed) & 3;
+        size0  = (size_t)1 << (16 + size0 * 2);
+        size0 += RDG_rand(seed) & (size0-1);   /* because size0 is power of 2*/
+        if (buffSize < pos + size0)
+        {
+            memset(buffPtr+pos, 0, buffSize-pos);
+            return;
+        }
+        memset(buffPtr+pos, 0, size0);
+        pos += size0;
+        buffPtr[pos-1] = RDG_genChar(seed, lt);
+    }
+
+    /* init */
+    if (pos==0) buffPtr[0] = RDG_genChar(seed, lt), pos=1;
+
+    /* Generate compressible data */
+    while (pos < buffSize)
+    {
+        /* Select : Literal (char) or Match (within 32K) */
+        if (RDG_RAND15BITS < matchProba32)
+        {
+            /* Copy (within 32K) */
+            size_t match;
+            size_t d;
+            int length = RDG_RANDLENGTH + 4;
+            U32 offset = RDG_RAND15BITS + 1;
+            if (offset > pos) offset = (U32)pos;
+            match = pos - offset;
+            d = pos + length;
+            if (d > buffSize) d = buffSize;
+            while (pos < d) buffPtr[pos++] = buffPtr[match++];
+        }
+        else
+        {
+            /* Literal (noise) */
+            size_t d;
+            size_t length = RDG_RANDLENGTH;
+            d = pos + length;
+            if (d > buffSize) d = buffSize;
+            while (pos < d) buffPtr[pos++] = RDG_genChar(seed, lt);
+        }
+    }
+}
+
+
+void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
+{
+    litDistribTable lt;
+    if (litProba==0.0) litProba = matchProba / 4.5;
+    RDG_fillLiteralDistrib(lt, litProba);
+    RDG_genBlock(buffer, size, 0, matchProba, lt, &seed);
+}
+
+
+#define RDG_BLOCKSIZE (128 KB)
+void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
+{
+    BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE];
+    U64 total = 0;
+    size_t genBlockSize = RDG_BLOCKSIZE;
+    litDistribTable lt;
+
+    /* init */
+    if (litProba==0.0) litProba = matchProba / 4.5;
+    RDG_fillLiteralDistrib(lt, litProba);
+    SET_BINARY_MODE(stdout);
+
+    /* Generate dict */
+    RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, lt, &seed);
+
+    /* Generate compressible data */
+    while (total < size)
+    {
+        RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, lt, &seed);
+        if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total);
+        total += genBlockSize;
+        fwrite(buff, 1, genBlockSize, stdout);
+        /* update dict */
+        memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE);
+    }
+}
diff --git a/lz4/lz4cli/datagen.h b/lz4/lz4cli/datagen.h
new file mode 100644
index 0000000..91c5b02
--- /dev/null
+++ b/lz4/lz4cli/datagen.h
@@ -0,0 +1,40 @@
+/*
+    datagen.h - compressible data generator header
+    Copyright (C) Yann Collet 2012-2016
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+   - LZ4 source repository : https://github.com/lz4/lz4
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+#include <stddef.h>   /* size_t */
+
+void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed);
+void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed);
+/* RDG_genOut
+   Generate 'size' bytes of compressible data into stdout.
+   Compressibility can be controlled using 'matchProba'.
+   'LitProba' is optional, and affect variability of bytes. If litProba==0.0, default value is used.
+   Generated data can be selected using 'seed'.
+   If (matchProba, litProba and seed) are equal, the function always generate the same content.
+
+   RDG_genBuffer
+   Same as RDG_genOut, but generate data into provided buffer
+*/
diff --git a/lz4/lz4cli/lz4.1 b/lz4/lz4cli/lz4.1
new file mode 100644
index 0000000..39d78cd
--- /dev/null
+++ b/lz4/lz4cli/lz4.1
@@ -0,0 +1,220 @@
+.
+.TH "LZ4" "1" "July 2017" "lz4 1.8.0" "User Commands"
+.
+.SH "NAME"
+\fBlz4\fR \- lz4, unlz4, lz4cat \- Compress or decompress \.lz4 files
+.
+.SH "SYNOPSIS"
+\fBlz4\fR [\fIOPTIONS\fR] [\-|INPUT\-FILE] \fIOUTPUT\-FILE\fR
+.
+.P
+\fBunlz4\fR is equivalent to \fBlz4 \-d\fR
+.
+.P
+\fBlz4cat\fR is equivalent to \fBlz4 \-dcfm\fR
+.
+.P
+When writing scripts that need to decompress files, it is recommended to always use the name \fBlz4\fR with appropriate arguments (\fBlz4 \-d\fR or \fBlz4 \-dc\fR) instead of the names \fBunlz4\fR and \fBlz4cat\fR\.
+.
+.SH "DESCRIPTION"
+\fBlz4\fR is an extremely fast lossless compression algorithm, based on \fBbyte\-aligned LZ77\fR family of compression scheme\. \fBlz4\fR offers compression speeds of 400 MB/s per core, linearly scalable with multi\-core CPUs\. It features an extremely fast decoder, with speed in multiple GB/s per core, typically reaching RAM speed limit on multi\-core systems\. The native file format is the \fB\.lz4\fR format\.
+.
+.SS "Difference between lz4 and gzip"
+\fBlz4\fR supports a command line syntax similar \fIbut not identical\fR to \fBgzip(1)\fR\. Differences are :
+.
+.IP "\(bu" 4
+\fBlz4\fR preserves original files
+.
+.IP "\(bu" 4
+\fBlz4\fR compresses a single file by default (see \fB\-m\fR for multiple files)
+.
+.IP "\(bu" 4
+\fBlz4 file1 file2\fR means : compress file1 \fIinto\fR file2
+.
+.IP "\(bu" 4
+\fBlz4 file\.lz4\fR will default to decompression (use \fB\-z\fR to force compression)
+.
+.IP "\(bu" 4
+\fBlz4\fR shows real\-time notification statistics during compression or decompression of a single file (use \fB\-q\fR to silent them)
+.
+.IP "\(bu" 4
+If no destination name is provided, result is sent to \fBstdout\fR \fIexcept if stdout is the console\fR\.
+.
+.IP "\(bu" 4
+If no destination name is provided, \fBand\fR if \fBstdout\fR is the console, \fBfile\fR is compressed into \fBfile\.lz4\fR\.
+.
+.IP "\(bu" 4
+As a consequence of previous rules, note the following example : \fBlz4 file | consumer\fR sends compressed data to \fBconsumer\fR through \fBstdout\fR, hence it does \fInot\fR create \fBfile\.lz4\fR\.
+.
+.IP "" 0
+.
+.P
+Default behaviors can be modified by opt\-in commands, detailed below\.
+.
+.IP "\(bu" 4
+\fBlz4 \-m\fR makes it possible to provide multiple input filenames, which will be compressed into files using suffix \fB\.lz4\fR\. Progress notifications are also disabled by default (use \fB\-v\fR to enable them)\. This mode has a behavior which more closely mimics \fBgzip\fR command line, with the main remaining difference being that source files are preserved by default\.
+.
+.IP "\(bu" 4
+Similarly, \fBlz4 \-m \-d\fR can decompress multiple \fB*\.lz4\fR files\.
+.
+.IP "\(bu" 4
+It\'s possible to opt\-in to erase source files on successful compression or decompression, using \fB\-\-rm\fR command\.
+.
+.IP "\(bu" 4
+Consequently, \fBlz4 \-m \-\-rm\fR behaves the same as \fBgzip\fR\.
+.
+.IP "" 0
+.
+.SS "Concatenation of \.lz4 files"
+It is possible to concatenate \fB\.lz4\fR files as is\. \fBlz4\fR will decompress such files as if they were a single \fB\.lz4\fR file\. For example:
+.
+.IP "" 4
+.
+.nf
+
+lz4 file1  > foo\.lz4
+lz4 file2 >> foo\.lz4
+.
+.fi
+.
+.IP "" 0
+.
+.P
+Then \fBlz4cat foo\.lz4\fR is equivalent to \fBcat file1 file2\fR\.
+.
+.SH "OPTIONS"
+.
+.SS "Short commands concatenation"
+In some cases, some options can be expressed using short command \fB\-x\fR or long command \fB\-\-long\-word\fR\. Short commands can be concatenated together\. For example, \fB\-d \-c\fR is equivalent to \fB\-dc\fR\. Long commands cannot be concatenated\. They must be clearly separated by a space\.
+.
+.SS "Multiple commands"
+When multiple contradictory commands are issued on a same command line, only the latest one will be applied\.
+.
+.SS "Operation mode"
+.
+.TP
+\fB\-z\fR \fB\-\-compress\fR
+Compress\. This is the default operation mode when no operation mode option is specified, no other operation mode is implied from the command name (for example, \fBunlz4\fR implies \fB\-\-decompress\fR), nor from the input file name (for example, a file extension \fB\.lz4\fR implies \fB\-\-decompress\fR by default)\. \fB\-z\fR can also be used to force compression of an already compressed \fB\.lz4\fR file\.
+.
+.TP
+\fB\-d\fR \fB\-\-decompress\fR \fB\-\-uncompress\fR
+Decompress\. \fB\-\-decompress\fR is also the default operation when the input filename has an \fB\.lz4\fR extension\.
+.
+.TP
+\fB\-t\fR \fB\-\-test\fR
+Test the integrity of compressed \fB\.lz4\fR files\. The decompressed data is discarded\. No files are created nor removed\.
+.
+.TP
+\fB\-b#\fR
+Benchmark mode, using \fB#\fR compression level\.
+.
+.SS "Operation modifiers"
+.
+.TP
+\fB\-#\fR
+Compression level, with # being any value from 1 to 16\. Higher values trade compression speed for compression ratio\. Values above 16 are considered the same as 16\. Recommended values are 1 for fast compression (default), and 9 for high compression\. Speed/compression trade\-off will vary depending on data to compress\. Decompression speed remains fast at all settings\.
+.
+.TP
+\fB\-f\fR \fB\-\-[no\-]force\fR
+This option has several effects:
+.
+.IP
+If the target file already exists, overwrite it without prompting\.
+.
+.IP
+When used with \fB\-\-decompress\fR and \fBlz4\fR cannot recognize the type of the source file, copy the source file as is to standard output\. This allows \fBlz4cat \-\-force\fR to be used like \fBcat (1)\fR for files that have not been compressed with \fBlz4\fR\.
+.
+.TP
+\fB\-c\fR \fB\-\-stdout\fR \fB\-\-to\-stdout\fR
+Force write to standard output, even if it is the console\.
+.
+.TP
+\fB\-m\fR \fB\-\-multiple\fR
+Multiple input files\. Compressed file names will be appended a \fB\.lz4\fR suffix\. This mode also reduces notification level\. \fBlz4 \-m\fR has a behavior equivalent to \fBgzip \-k\fR (it preserves source files by default)\.
+.
+.TP
+\fB\-r\fR
+operate recursively on directories\. This mode also sets \fB\-m\fR (multiple input files)\.
+.
+.TP
+\fB\-B#\fR
+Block size [4\-7](default : 7)
+.
+.br
+\fB\-B4\fR= 64KB ; \fB\-B5\fR= 256KB ; \fB\-B6\fR= 1MB ; \fB\-B7\fR= 4MB
+.
+.TP
+\fB\-BD\fR
+Block Dependency (improves compression ratio on small blocks)
+.
+.TP
+\fB\-\-[no\-]frame\-crc\fR
+Select frame checksum (default:enabled)
+.
+.TP
+\fB\-\-[no\-]content\-size\fR
+Header includes original size (default:not present)
+.
+.br
+Note : this option can only be activated when the original size can be determined, hence for a file\. It won\'t work with unknown source size, such as stdin or pipe\.
+.
+.TP
+\fB\-\-[no\-]sparse\fR
+Sparse mode support (default:enabled on file, disabled on stdout)
+.
+.TP
+\fB\-l\fR
+Use Legacy format (typically for Linux Kernel compression)
+.
+.br
+Note : \fB\-l\fR is not compatible with \fB\-m\fR (\fB\-\-multiple\fR) nor \fB\-r\fR
+.
+.SS "Other options"
+.
+.TP
+\fB\-v\fR \fB\-\-verbose\fR
+Verbose mode
+.
+.TP
+\fB\-q\fR \fB\-\-quiet\fR
+Suppress warnings and real\-time statistics; specify twice to suppress errors too
+.
+.TP
+\fB\-h\fR \fB\-H\fR \fB\-\-help\fR
+Display help/long help and exit
+.
+.TP
+\fB\-V\fR \fB\-\-version\fR
+Display Version number and exit
+.
+.TP
+\fB\-k\fR \fB\-\-keep\fR
+Preserve source files (default behavior)
+.
+.TP
+\fB\-\-rm\fR
+Delete source files on successful compression or decompression
+.
+.TP
+\fB\-\-\fR
+Treat all subsequent arguments as files
+.
+.SS "Benchmark mode"
+.
+.TP
+\fB\-b#\fR
+Benchmark file(s), using # compression level
+.
+.TP
+\fB\-e#\fR
+Benchmark multiple compression levels, from b# to e# (included)
+.
+.TP
+\fB\-i#\fR
+Minimum evaluation in seconds [1\-9] (default : 3)
+.
+.SH "BUGS"
+Report bugs at: https://github\.com/lz4/lz4/issues
+.
+.SH "AUTHOR"
+Yann Collet
diff --git a/lz4/lz4cli/lz4.1.md b/lz4/lz4cli/lz4.1.md
new file mode 100644
index 0000000..c6b99bc
--- /dev/null
+++ b/lz4/lz4cli/lz4.1.md
@@ -0,0 +1,218 @@
+lz4(1) -- lz4, unlz4, lz4cat - Compress or decompress .lz4 files
+================================================================
+
+SYNOPSIS
+--------
+
+`lz4` [*OPTIONS*] [-|INPUT-FILE] <OUTPUT-FILE>
+
+`unlz4` is equivalent to `lz4 -d`
+
+`lz4cat` is equivalent to `lz4 -dcfm`
+
+When writing scripts that need to decompress files,
+it is recommended to always use the name `lz4` with appropriate arguments
+(`lz4 -d` or `lz4 -dc`) instead of the names `unlz4` and `lz4cat`.
+
+
+DESCRIPTION
+-----------
+
+`lz4` is an extremely fast lossless compression algorithm,
+based on **byte-aligned LZ77** family of compression scheme.
+`lz4` offers compression speeds of 400 MB/s per core, linearly scalable with
+multi-core CPUs.
+It features an extremely fast decoder, with speed in multiple GB/s per core,
+typically reaching RAM speed limit on multi-core systems.
+The native file format is the `.lz4` format.
+
+### Difference between lz4 and gzip
+
+`lz4` supports a command line syntax similar _but not identical_ to `gzip(1)`.
+Differences are :
+
+  * `lz4` preserves original files
+  * `lz4` compresses a single file by default (see `-m` for multiple files)
+  * `lz4 file1 file2` means : compress file1 _into_ file2
+  * `lz4 file.lz4` will default to decompression (use `-z` to force compression)
+  * `lz4` shows real-time notification statistics
+     during compression or decompression of a single file
+     (use `-q` to silent them)
+  * If no destination name is provided, result is sent to `stdout`
+    _except if stdout is the console_.
+  * If no destination name is provided, __and__ if `stdout` is the console,
+    `file` is compressed into `file.lz4`.
+  * As a consequence of previous rules, note the following example :
+    `lz4 file | consumer` sends compressed data to `consumer` through `stdout`,
+    hence it does _not_ create `file.lz4`.
+
+Default behaviors can be modified by opt-in commands, detailed below.
+
+  * `lz4 -m` makes it possible to provide multiple input filenames,
+    which will be compressed into files using suffix `.lz4`.
+    Progress notifications are also disabled by default (use `-v` to enable them).
+    This mode has a behavior which more closely mimics `gzip` command line,
+    with the main remaining difference being that source files are preserved by default.
+  * Similarly, `lz4 -m -d` can decompress multiple `*.lz4` files.
+  * It's possible to opt-in to erase source files
+    on successful compression or decompression, using `--rm` command.
+  * Consequently, `lz4 -m --rm` behaves the same as `gzip`.
+
+### Concatenation of .lz4 files
+
+It is possible to concatenate `.lz4` files as is.
+`lz4` will decompress such files as if they were a single `.lz4` file.
+For example:
+
+    lz4 file1  > foo.lz4
+    lz4 file2 >> foo.lz4
+
+Then `lz4cat foo.lz4` is equivalent to `cat file1 file2`.
+
+OPTIONS
+-------
+
+### Short commands concatenation
+
+In some cases, some options can be expressed using short command `-x`
+or long command `--long-word`.
+Short commands can be concatenated together.
+For example, `-d -c` is equivalent to `-dc`.
+Long commands cannot be concatenated.
+They must be clearly separated by a space.
+
+### Multiple commands
+
+When multiple contradictory commands are issued on a same command line,
+only the latest one will be applied.
+
+### Operation mode
+
+* `-z` `--compress`:
+  Compress.
+  This is the default operation mode when no operation mode option is
+  specified, no other operation mode is implied from the command name
+  (for example, `unlz4` implies `--decompress`),
+  nor from the input file name
+  (for example, a file extension `.lz4` implies  `--decompress` by default).
+  `-z` can also be used to force compression of an already compressed
+  `.lz4` file.
+
+* `-d` `--decompress` `--uncompress`:
+  Decompress.
+  `--decompress` is also the default operation when the input filename has an
+  `.lz4` extension.
+
+* `-t` `--test`:
+  Test the integrity of compressed `.lz4` files.
+  The decompressed data is discarded.
+  No files are created nor removed.
+
+* `-b#`:
+  Benchmark mode, using `#` compression level.
+
+### Operation modifiers
+
+* `-#`:
+  Compression level, with # being any value from 1 to 16.
+  Higher values trade compression speed for compression ratio.
+  Values above 16 are considered the same as 16.
+  Recommended values are 1 for fast compression (default),
+  and 9 for high compression.
+  Speed/compression trade-off will vary depending on data to compress.
+  Decompression speed remains fast at all settings.
+
+* `-f` `--[no-]force`:
+  This option has several effects:
+
+  If the target file already exists, overwrite it without prompting.
+
+  When used with `--decompress` and `lz4` cannot recognize the type of
+  the source file, copy the source file as is to standard output.
+  This allows `lz4cat --force` to be used like `cat (1)` for files
+  that have not been compressed with `lz4`.
+
+* `-c` `--stdout` `--to-stdout`:
+  Force write to standard output, even if it is the console.
+
+* `-m` `--multiple`:
+  Multiple input files.
+  Compressed file names will be appended a `.lz4` suffix.
+  This mode also reduces notification level.
+  `lz4 -m` has a behavior equivalent to `gzip -k`
+  (it preserves source files by default).
+
+* `-r` :
+  operate recursively on directories.
+  This mode also sets `-m` (multiple input files).
+
+* `-B#`:
+  Block size \[4-7\](default : 7)<br/>
+  `-B4`= 64KB ; `-B5`= 256KB ; `-B6`= 1MB ; `-B7`= 4MB
+
+* `-BD`:
+  Block Dependency (improves compression ratio on small blocks)
+
+* `--[no-]frame-crc`:
+  Select frame checksum (default:enabled)
+
+* `--[no-]content-size`:
+  Header includes original size (default:not present)<br/>
+  Note : this option can only be activated when the original size can be
+  determined, hence for a file. It won't work with unknown source size,
+  such as stdin or pipe.
+
+* `--[no-]sparse`:
+  Sparse mode support (default:enabled on file, disabled on stdout)
+
+* `-l`:
+  Use Legacy format (typically for Linux Kernel compression)<br/>
+  Note : `-l` is not compatible with `-m` (`--multiple`) nor `-r`
+
+### Other options
+
+* `-v` `--verbose`:
+  Verbose mode
+
+* `-q` `--quiet`:
+  Suppress warnings and real-time statistics;
+  specify twice to suppress errors too
+
+* `-h` `-H` `--help`:
+  Display help/long help and exit
+
+* `-V` `--version`:
+  Display Version number and exit
+
+* `-k` `--keep`:
+  Preserve source files (default behavior)
+
+* `--rm` :
+  Delete source files on successful compression or decompression
+
+* `--` :
+  Treat all subsequent arguments as files
+
+
+### Benchmark mode
+
+* `-b#`:
+  Benchmark file(s), using # compression level
+
+* `-e#`:
+  Benchmark multiple compression levels, from b# to e# (included)
+
+* `-i#`:
+  Minimum evaluation in seconds \[1-9\] (default : 3)
+
+
+BUGS
+----
+
+Report bugs at: https://github.com/lz4/lz4/issues
+
+
+AUTHOR
+------
+
+Yann Collet
diff --git a/lz4/lz4cli/lz4cli.c b/lz4/lz4cli/lz4cli.c
new file mode 100644
index 0000000..b4a3c14
--- /dev/null
+++ b/lz4/lz4cli/lz4cli.c
@@ -0,0 +1,647 @@
+/*
+  LZ4cli - LZ4 Command Line Interface
+  Copyright (C) Yann Collet 2011-2016
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - LZ4 source repository : https://github.com/lz4/lz4
+  - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/*
+  Note : this is stand-alone program.
+  It is not part of LZ4 compression library, it is a user program of the LZ4 library.
+  The license of LZ4 library is BSD.
+  The license of xxHash library is BSD.
+  The license of this compression CLI program is GPLv2.
+*/
+
+
+/****************************
+*  Includes
+*****************************/
+#include "platform.h" /* Compiler options, IS_CONSOLE */
+#include "util.h"     /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList */
+#include <stdio.h>    /* fprintf, getchar */
+#include <stdlib.h>   /* exit, calloc, free */
+#include <string.h>   /* strcmp, strlen */
+#include "bench.h"    /* BMK_benchFile, BMK_SetNbIterations, BMK_SetBlocksize, BMK_SetPause */
+#include "lz4io.h"    /* LZ4IO_compressFilename, LZ4IO_decompressFilename, LZ4IO_compressMultipleFilenames */
+#include "lz4hc.h"    /* LZ4HC_CLEVEL_MAX */
+#include "lz4.h"      /* LZ4_VERSION_STRING */
+
+
+/*****************************
+*  Constants
+******************************/
+#define COMPRESSOR_NAME "LZ4 command line interface"
+#define AUTHOR "Yann Collet"
+#define WELCOME_MESSAGE "*** %s %i-bits v%s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), LZ4_versionString(), AUTHOR
+#define LZ4_EXTENSION ".lz4"
+#define LZ4CAT "lz4cat"
+#define UNLZ4 "unlz4"
+#define LZ4_LEGACY "lz4c"
+static int g_lz4c_legacy_commands = 0;
+
+#define KB *(1U<<10)
+#define MB *(1U<<20)
+#define GB *(1U<<30)
+
+#define LZ4_BLOCKSIZEID_DEFAULT 7
+
+
+/*-************************************
+*  Macros
+***************************************/
+#define DISPLAY(...)           fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...)   if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static unsigned displayLevel = 2;   /* 0 : no display ; 1: errors only ; 2 : downgradable normal ; 3 : non-downgradable normal; 4 : + information */
+
+
+/*-************************************
+*  Exceptions
+***************************************/
+#define DEBUG 0
+#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
+#define EXM_THROW(error, ...)                                             \
+{                                                                         \
+    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+    DISPLAYLEVEL(1, "Error %i : ", error);                                \
+    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
+    DISPLAYLEVEL(1, "\n");                                                \
+    exit(error);                                                          \
+}
+
+
+/*-************************************
+*  Version modifiers
+***************************************/
+#define EXTENDED_ARGUMENTS
+#define EXTENDED_HELP
+#define EXTENDED_FORMAT
+#define DEFAULT_COMPRESSOR   LZ4IO_compressFilename
+#define DEFAULT_DECOMPRESSOR LZ4IO_decompressFilename
+int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename, int compressionlevel);   /* hidden function */
+
+
+/*-***************************
+*  Functions
+*****************************/
+static int usage(const char* exeName)
+{
+    DISPLAY( "Usage : \n");
+    DISPLAY( "      %s [arg] [input] [output] \n", exeName);
+    DISPLAY( "\n");
+    DISPLAY( "input   : a filename \n");
+    DISPLAY( "          with no FILE, or when FILE is - or %s, read standard input\n", stdinmark);
+    DISPLAY( "Arguments : \n");
+    DISPLAY( " -1     : Fast compression (default) \n");
+    DISPLAY( " -9     : High compression \n");
+    DISPLAY( " -d     : decompression (default for %s extension)\n", LZ4_EXTENSION);
+    DISPLAY( " -z     : force compression \n");
+    DISPLAY( " -f     : overwrite output without prompting \n");
+    DISPLAY( " -k     : preserve source files(s)  (default) \n");
+    DISPLAY( "--rm    : remove source file(s) after successful de/compression \n");
+    DISPLAY( " -h/-H  : display help/long help and exit \n");
+    return 0;
+}
+
+static int usage_advanced(const char* exeName)
+{
+    DISPLAY(WELCOME_MESSAGE);
+    usage(exeName);
+    DISPLAY( "\n");
+    DISPLAY( "Advanced arguments :\n");
+    DISPLAY( " -V     : display Version number and exit \n");
+    DISPLAY( " -v     : verbose mode \n");
+    DISPLAY( " -q     : suppress warnings; specify twice to suppress errors too\n");
+    DISPLAY( " -c     : force write to standard output, even if it is the console\n");
+    DISPLAY( " -t     : test compressed file integrity\n");
+    DISPLAY( " -m     : multiple input files (implies automatic output filenames)\n");
+#ifdef UTIL_HAS_CREATEFILELIST
+    DISPLAY( " -r     : operate recursively on directories (sets also -m) \n");
+#endif
+    DISPLAY( " -l     : compress using Legacy format (Linux kernel compression)\n");
+    DISPLAY( " -B#    : Block size [4-7] (default : 7) \n");
+    DISPLAY( " -BD    : Block dependency (improve compression ratio) \n");
+    DISPLAY( " -BX    : enable block checksum (default:disabled) \n");
+    DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled) \n");
+    DISPLAY( "--content-size : compressed frame includes original size (default:not present)\n");
+    DISPLAY( "--[no-]sparse  : sparse mode (default:enabled on file, disabled on stdout)\n");
+    DISPLAY( "Benchmark arguments : \n");
+    DISPLAY( " -b#    : benchmark file(s), using # compression level (default : 1) \n");
+    DISPLAY( " -e#    : test all compression levels from -bX to # (default : 1)\n");
+    DISPLAY( " -i#    : minimum evaluation time in seconds (default : 3s) \n");
+    DISPLAY( " -B#    : cut file into independent blocks of size # bytes [32+] \n");
+    DISPLAY( "                     or predefined block size [4-7] (default: 7) \n");
+    if (g_lz4c_legacy_commands) {
+        DISPLAY( "Legacy arguments : \n");
+        DISPLAY( " -c0    : fast compression \n");
+        DISPLAY( " -c1    : high compression \n");
+        DISPLAY( " -hc    : high compression \n");
+        DISPLAY( " -y     : overwrite output without prompting \n");
+    }
+    EXTENDED_HELP;
+    return 0;
+}
+
+static int usage_longhelp(const char* exeName)
+{
+    usage_advanced(exeName);
+    DISPLAY( "\n");
+    DISPLAY( "****************************\n");
+    DISPLAY( "***** Advanced comment *****\n");
+    DISPLAY( "****************************\n");
+    DISPLAY( "\n");
+    DISPLAY( "Which values can [output] have ? \n");
+    DISPLAY( "---------------------------------\n");
+    DISPLAY( "[output] : a filename \n");
+    DISPLAY( "          '%s', or '-' for standard output (pipe mode)\n", stdoutmark);
+    DISPLAY( "          '%s' to discard output (test mode) \n", NULL_OUTPUT);
+    DISPLAY( "[output] can be left empty. In this case, it receives the following value :\n");
+    DISPLAY( "          - if stdout is not the console, then [output] = stdout \n");
+    DISPLAY( "          - if stdout is console : \n");
+    DISPLAY( "               + for compression, output to filename%s \n", LZ4_EXTENSION);
+    DISPLAY( "               + for decompression, output to filename without '%s'\n", LZ4_EXTENSION);
+    DISPLAY( "                    > if input filename has no '%s' extension : error \n", LZ4_EXTENSION);
+    DISPLAY( "\n");
+    DISPLAY( "Compression levels : \n");
+    DISPLAY( "---------------------\n");
+    DISPLAY( "-0 ... -2  => Fast compression, all identicals\n");
+    DISPLAY( "-3 ... -%d => High compression; higher number == more compression but slower\n", LZ4HC_CLEVEL_MAX);
+    DISPLAY( "\n");
+    DISPLAY( "stdin, stdout and the console : \n");
+    DISPLAY( "--------------------------------\n");
+    DISPLAY( "To protect the console from binary flooding (bad argument mistake)\n");
+    DISPLAY( "%s will refuse to read from console, or write to console \n", exeName);
+    DISPLAY( "except if '-c' command is specified, to force output to console \n");
+    DISPLAY( "\n");
+    DISPLAY( "Simple example :\n");
+    DISPLAY( "----------------\n");
+    DISPLAY( "1 : compress 'filename' fast, using default output name 'filename.lz4'\n");
+    DISPLAY( "          %s filename\n", exeName);
+    DISPLAY( "\n");
+    DISPLAY( "Short arguments can be aggregated. For example :\n");
+    DISPLAY( "----------------------------------\n");
+    DISPLAY( "2 : compress 'filename' in high compression mode, overwrite output if exists\n");
+    DISPLAY( "          %s -9 -f filename \n", exeName);
+    DISPLAY( "    is equivalent to :\n");
+    DISPLAY( "          %s -9f filename \n", exeName);
+    DISPLAY( "\n");
+    DISPLAY( "%s can be used in 'pure pipe mode'. For example :\n", exeName);
+    DISPLAY( "-------------------------------------\n");
+    DISPLAY( "3 : compress data stream from 'generator', send result to 'consumer'\n");
+    DISPLAY( "          generator | %s | consumer \n", exeName);
+    if (g_lz4c_legacy_commands) {
+        DISPLAY( "\n");
+        DISPLAY( "***** Warning  *****\n");
+        DISPLAY( "Legacy arguments take precedence. Therefore : \n");
+        DISPLAY( "---------------------------------\n");
+        DISPLAY( "          %s -hc filename\n", exeName);
+        DISPLAY( "means 'compress filename in high compression mode'\n");
+        DISPLAY( "It is not equivalent to :\n");
+        DISPLAY( "          %s -h -c filename\n", exeName);
+        DISPLAY( "which would display help text and exit\n");
+    }
+    return 0;
+}
+
+static int badusage(const char* exeName)
+{
+    DISPLAYLEVEL(1, "Incorrect parameters\n");
+    if (displayLevel >= 1) usage(exeName);
+    exit(1);
+}
+
+
+static void waitEnter(void)
+{
+    DISPLAY("Press enter to continue...\n");
+    (void)getchar();
+}
+
+static const char* lastNameFromPath(const char* path)
+{
+    const char* name = path;
+    if (strrchr(name, '/')) name = strrchr(name, '/') + 1;
+    if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */
+    return name;
+}
+
+/*! exeNameMatch() :
+    @return : a non-zero value if exeName matches test, excluding the extension
+   */
+static int exeNameMatch(const char* exeName, const char* test)
+{
+    return !strncmp(exeName, test, strlen(test)) &&
+        (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.');
+}
+
+/*! readU32FromChar() :
+    @return : unsigned integer value read from input in `char` format
+    allows and interprets K, KB, KiB, M, MB and MiB suffix.
+    Will also modify `*stringPtr`, advancing it to position where it stopped reading.
+    Note : function result can overflow if digit string > MAX_UINT */
+static unsigned readU32FromChar(const char** stringPtr)
+{
+    unsigned result = 0;
+    while ((**stringPtr >='0') && (**stringPtr <='9'))
+        result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+    if ((**stringPtr=='K') || (**stringPtr=='M')) {
+        result <<= 10;
+        if (**stringPtr=='M') result <<= 10;
+        (*stringPtr)++ ;
+        if (**stringPtr=='i') (*stringPtr)++;
+        if (**stringPtr=='B') (*stringPtr)++;
+    }
+    return result;
+}
+
+typedef enum { om_auto, om_compress, om_decompress, om_test, om_bench } operationMode_e;
+
+int main(int argc, const char** argv)
+{
+    int i,
+        cLevel=1,
+        cLevelLast=1,
+        legacy_format=0,
+        forceStdout=0,
+        main_pause=0,
+        multiple_inputs=0,
+        all_arguments_are_files=0,
+        operationResult=0;
+    operationMode_e mode = om_auto;
+    const char* input_filename = NULL;
+    const char* output_filename= NULL;
+    char* dynNameSpace = NULL;
+    const char** inFileNames = (const char**) calloc(argc, sizeof(char*));
+    unsigned ifnIdx=0;
+    const char nullOutput[] = NULL_OUTPUT;
+    const char extension[] = LZ4_EXTENSION;
+    size_t blockSize = LZ4IO_setBlockSizeID(LZ4_BLOCKSIZEID_DEFAULT);
+    const char* const exeName = lastNameFromPath(argv[0]);
+#ifdef UTIL_HAS_CREATEFILELIST
+    const char** extendedFileList = NULL;
+    char* fileNamesBuf = NULL;
+    unsigned fileNamesNb, recursive=0;
+#endif
+
+    /* Init */
+    if (inFileNames==NULL) {
+        DISPLAY("Allocation error : not enough memory \n");
+        return 1;
+    }
+    inFileNames[0] = stdinmark;
+    LZ4IO_setOverwrite(0);
+
+    /* predefined behaviors, based on binary/link name */
+    if (exeNameMatch(exeName, LZ4CAT)) {
+        mode = om_decompress;
+        LZ4IO_setOverwrite(1);
+        LZ4IO_setRemoveSrcFile(0);
+        forceStdout=1;
+        output_filename=stdoutmark;
+        displayLevel=1;
+        multiple_inputs=1;
+    }
+    if (exeNameMatch(exeName, UNLZ4)) { mode = om_decompress; }
+    if (exeNameMatch(exeName, LZ4_LEGACY)) { g_lz4c_legacy_commands=1; }
+
+    /* command switches */
+    for(i=1; i<argc; i++) {
+        const char* argument = argv[i];
+
+        if(!argument) continue;   /* Protection if argument empty */
+
+        /* Short commands (note : aggregated short commands are allowed) */
+        if (!all_arguments_are_files && argument[0]=='-') {
+            /* '-' means stdin/stdout */
+            if (argument[1]==0) {
+                if (!input_filename) input_filename=stdinmark;
+                else output_filename=stdoutmark;
+                continue;
+            }
+
+            /* long commands (--long-word) */
+            if (argument[1]=='-') {
+                if (!strcmp(argument,  "--")) { all_arguments_are_files = 1; continue; }
+                if (!strcmp(argument,  "--compress")) { mode = om_compress; continue; }
+                if ((!strcmp(argument, "--decompress"))
+                    || (!strcmp(argument, "--uncompress"))) { mode = om_decompress; continue; }
+                if (!strcmp(argument,  "--multiple")) { multiple_inputs = 1; continue; }
+                if (!strcmp(argument,  "--test")) { mode = om_test; continue; }
+                if (!strcmp(argument,  "--force")) { LZ4IO_setOverwrite(1); continue; }
+                if (!strcmp(argument,  "--no-force")) { LZ4IO_setOverwrite(0); continue; }
+                if ((!strcmp(argument, "--stdout"))
+                    || (!strcmp(argument, "--to-stdout"))) { forceStdout=1; output_filename=stdoutmark; continue; }
+                if (!strcmp(argument,  "--frame-crc")) { LZ4IO_setStreamChecksumMode(1); continue; }
+                if (!strcmp(argument,  "--no-frame-crc")) { LZ4IO_setStreamChecksumMode(0); continue; }
+                if (!strcmp(argument,  "--content-size")) { LZ4IO_setContentSize(1); continue; }
+                if (!strcmp(argument,  "--no-content-size")) { LZ4IO_setContentSize(0); continue; }
+                if (!strcmp(argument,  "--sparse")) { LZ4IO_setSparseFile(2); continue; }
+                if (!strcmp(argument,  "--no-sparse")) { LZ4IO_setSparseFile(0); continue; }
+                if (!strcmp(argument,  "--verbose")) { displayLevel++; continue; }
+                if (!strcmp(argument,  "--quiet")) { if (displayLevel) displayLevel--; continue; }
+                if (!strcmp(argument,  "--version")) { DISPLAY(WELCOME_MESSAGE); return 0; }
+                if (!strcmp(argument,  "--help")) { usage_advanced(exeName); goto _cleanup; }
+                if (!strcmp(argument,  "--keep")) { LZ4IO_setRemoveSrcFile(0); continue; }   /* keep source file (default) */
+                if (!strcmp(argument,  "--rm")) { LZ4IO_setRemoveSrcFile(1); continue; }
+            }
+
+            while (argument[1]!=0) {
+                argument ++;
+
+                if (g_lz4c_legacy_commands) {
+                    /* Legacy commands (-c0, -c1, -hc, -y) */
+                    if ((argument[0]=='c') && (argument[1]=='0')) { cLevel=0; argument++; continue; }  /* -c0 (fast compression) */
+                    if ((argument[0]=='c') && (argument[1]=='1')) { cLevel=9; argument++; continue; }  /* -c1 (high compression) */
+                    if ((argument[0]=='h') && (argument[1]=='c')) { cLevel=9; argument++; continue; }  /* -hc (high compression) */
+                    if (argument[0]=='y') { LZ4IO_setOverwrite(1); continue; }                         /* -y (answer 'yes' to overwrite permission) */
+                }
+
+                if ((*argument>='0') && (*argument<='9')) {
+                    cLevel = readU32FromChar(&argument);
+                    argument--;
+                    continue;
+                }
+
+
+                switch(argument[0])
+                {
+                    /* Display help */
+                case 'V': DISPLAY(WELCOME_MESSAGE); goto _cleanup;   /* Version */
+                case 'h': usage_advanced(exeName); goto _cleanup;
+                case 'H': usage_longhelp(exeName); goto _cleanup;
+
+                case 'e':
+                    argument++;
+                    cLevelLast = readU32FromChar(&argument);
+                    argument--;
+                    break;
+
+                    /* Compression (default) */
+                case 'z': mode = om_compress; break;
+
+                    /* Use Legacy format (ex : Linux kernel compression) */
+                case 'l': legacy_format = 1; blockSize = 8 MB; break;
+
+                    /* Decoding */
+                case 'd': mode = om_decompress; break;
+
+                    /* Force stdout, even if stdout==console */
+                case 'c': forceStdout=1; output_filename=stdoutmark; break;
+
+                    /* Test integrity */
+                case 't': mode = om_test; break;
+
+                    /* Overwrite */
+                case 'f': LZ4IO_setOverwrite(1); break;
+
+                    /* Verbose mode */
+                case 'v': displayLevel++; break;
+
+                    /* Quiet mode */
+                case 'q': if (displayLevel) displayLevel--; break;
+
+                    /* keep source file (default anyway, so useless) (for xz/lzma compatibility) */
+                case 'k': LZ4IO_setRemoveSrcFile(0); break;
+
+                    /* Modify Block Properties */
+                case 'B':
+                    while (argument[1]!=0) {
+                        int exitBlockProperties=0;
+                        switch(argument[1])
+                        {
+                        case 'D': LZ4IO_setBlockMode(LZ4IO_blockLinked); argument++; break;
+                        case 'X': LZ4IO_setBlockChecksumMode(1); argument ++; break;   /* disabled by default */
+                        default :
+                            if (argument[1] < '0' || argument[1] > '9') {
+                                exitBlockProperties=1;
+                                break;
+                            } else {
+                                unsigned B;
+                                argument++;
+                                B = readU32FromChar(&argument);
+                                argument--;
+                                if (B < 4) badusage(exeName);
+                                if (B <= 7) {
+                                    blockSize = LZ4IO_setBlockSizeID(B);
+                                    BMK_SetBlockSize(blockSize);
+                                    DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10));
+                                } else {
+                                    if (B < 32) badusage(exeName);
+                                    BMK_SetBlockSize(B);
+                                    if (B >= 1024) {
+                                        DISPLAYLEVEL(2, "bench: using blocks of size %u KB \n", (U32)(B>>10));
+                                    } else {
+                                        DISPLAYLEVEL(2, "bench: using blocks of size %u bytes \n", (U32)(B));
+                                    }
+                                }
+                                break;
+                            }
+                        }
+                        if (exitBlockProperties) break;
+                    }
+                    break;
+
+                    /* Benchmark */
+                case 'b': mode = om_bench; multiple_inputs=1;
+                    break;
+
+#ifdef UTIL_HAS_CREATEFILELIST
+                    /* recursive */
+                case 'r': recursive=1;
+#endif
+                    /* fall-through */
+                    /* Treat non-option args as input files.  See https://code.google.com/p/lz4/issues/detail?id=151 */
+                case 'm': multiple_inputs=1;
+                    break;
+
+                    /* Modify Nb Seconds (benchmark only) */
+                case 'i':
+                    {   unsigned iters;
+                        argument++;
+                        iters = readU32FromChar(&argument);
+                        argument--;
+                        BMK_setNotificationLevel(displayLevel);
+                        BMK_SetNbSeconds(iters);   /* notification if displayLevel >= 3 */
+                    }
+                    break;
+
+                    /* Pause at the end (hidden option) */
+                case 'p': main_pause=1; break;
+
+                    /* Specific commands for customized versions */
+                EXTENDED_ARGUMENTS;
+
+                    /* Unrecognised command */
+                default : badusage(exeName);
+                }
+            }
+            continue;
+        }
+
+        /* Store in *inFileNames[] if -m is used. */
+        if (multiple_inputs) { inFileNames[ifnIdx++]=argument; continue; }
+
+        /* Store first non-option arg in input_filename to preserve original cli logic. */
+        if (!input_filename) { input_filename=argument; continue; }
+
+        /* Second non-option arg in output_filename to preserve original cli logic. */
+        if (!output_filename) {
+            output_filename=argument;
+            if (!strcmp (output_filename, nullOutput)) output_filename = nulmark;
+            continue;
+        }
+
+        /* 3rd non-option arg should not exist */
+        DISPLAYLEVEL(1, "Warning : %s won't be used ! Do you want multiple input files (-m) ? \n", argument);
+    }
+
+    DISPLAYLEVEL(3, WELCOME_MESSAGE);
+#ifdef _POSIX_C_SOURCE
+    DISPLAYLEVEL(4, "_POSIX_C_SOURCE defined: %ldL\n", (long) _POSIX_C_SOURCE);
+#endif
+#ifdef _POSIX_VERSION
+    DISPLAYLEVEL(4, "_POSIX_VERSION defined: %ldL\n", (long) _POSIX_VERSION);
+#endif
+#ifdef PLATFORM_POSIX_VERSION
+    DISPLAYLEVEL(4, "PLATFORM_POSIX_VERSION defined: %ldL\n", (long) PLATFORM_POSIX_VERSION);
+#endif
+#ifdef _FILE_OFFSET_BITS
+    DISPLAYLEVEL(4, "_FILE_OFFSET_BITS defined: %ldL\n", (long) _FILE_OFFSET_BITS);
+#endif
+    if ((mode == om_compress) || (mode == om_bench))
+        DISPLAYLEVEL(4, "Blocks size : %u KB\n", (U32)(blockSize>>10));
+
+    if (multiple_inputs) {
+        input_filename = inFileNames[0];
+#ifdef UTIL_HAS_CREATEFILELIST
+        if (recursive) {  /* at this stage, filenameTable is a list of paths, which can contain both files and directories */
+            extendedFileList = UTIL_createFileList(inFileNames, ifnIdx, &fileNamesBuf, &fileNamesNb);
+            if (extendedFileList) {
+                unsigned u;
+                for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
+                free((void*)inFileNames);
+                inFileNames = extendedFileList;
+                ifnIdx = fileNamesNb;
+            }
+        }
+#endif
+    }
+
+    /* benchmark and test modes */
+    if (mode == om_bench) {
+        BMK_setNotificationLevel(displayLevel);
+        operationResult = BMK_benchFiles(inFileNames, ifnIdx, cLevel, cLevelLast);
+        goto _cleanup;
+    }
+
+    if (mode == om_test) {
+        LZ4IO_setTestMode(1);
+        output_filename = nulmark;
+        mode = om_decompress;   /* defer to decompress */
+    }
+
+    /* compress or decompress */
+    if (!input_filename) input_filename = stdinmark;
+    /* Check if input is defined as console; trigger an error in this case */
+    if (!strcmp(input_filename, stdinmark) && IS_CONSOLE(stdin) ) {
+        DISPLAYLEVEL(1, "refusing to read from a console\n");
+        exit(1);
+    }
+    /* if input==stdin and no output defined, stdout becomes default output */
+    if (!strcmp(input_filename, stdinmark) && !output_filename)
+        output_filename = stdoutmark;
+
+    /* No output filename ==> try to select one automatically (when possible) */
+    while ((!output_filename) && (multiple_inputs==0)) {
+        if (!IS_CONSOLE(stdout)) { output_filename=stdoutmark; break; }   /* Default to stdout whenever possible (i.e. not a console) */
+        if (mode == om_auto) {  /* auto-determine compression or decompression, based on file extension */
+            size_t const inSize  = strlen(input_filename);
+            size_t const extSize = strlen(LZ4_EXTENSION);
+            size_t const extStart= (inSize > extSize) ? inSize-extSize : 0;
+            if (!strcmp(input_filename+extStart, LZ4_EXTENSION)) mode = om_decompress;
+            else mode = om_compress;
+        }
+        if (mode == om_compress) {   /* compression to file */
+            size_t const l = strlen(input_filename);
+            dynNameSpace = (char*)calloc(1,l+5);
+            if (dynNameSpace==NULL) { perror(exeName); exit(1); }
+            strcpy(dynNameSpace, input_filename);
+            strcat(dynNameSpace, LZ4_EXTENSION);
+            output_filename = dynNameSpace;
+            DISPLAYLEVEL(2, "Compressed filename will be : %s \n", output_filename);
+            break;
+        }
+        if (mode == om_decompress) {/* decompression to file (automatic name will work only if input filename has correct format extension) */
+            size_t outl;
+            size_t const inl = strlen(input_filename);
+            dynNameSpace = (char*)calloc(1,inl+1);
+            if (dynNameSpace==NULL) { perror(exeName); exit(1); }
+            strcpy(dynNameSpace, input_filename);
+            outl = inl;
+            if (inl>4)
+                while ((outl >= inl-4) && (input_filename[outl] ==  extension[outl-inl+4])) dynNameSpace[outl--]=0;
+            if (outl != inl-5) { DISPLAYLEVEL(1, "Cannot determine an output filename\n"); badusage(exeName); }
+            output_filename = dynNameSpace;
+            DISPLAYLEVEL(2, "Decoding file %s \n", output_filename);
+        }
+        break;
+    }
+
+    /* Check if output is defined as console; trigger an error in this case */
+    if (!output_filename) output_filename = "*\\dummy^!//";
+    if (!strcmp(output_filename,stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) {
+        DISPLAYLEVEL(1, "refusing to write to console without -c\n");
+        exit(1);
+    }
+    /* Downgrade notification level in stdout and multiple file mode */
+    if (!strcmp(output_filename,stdoutmark) && (displayLevel==2)) displayLevel=1;
+    if ((multiple_inputs) && (displayLevel==2)) displayLevel=1;
+
+    /* IO Stream/File */
+    LZ4IO_setNotificationLevel(displayLevel);
+    if (ifnIdx == 0) multiple_inputs = 0;
+    if (mode == om_decompress) {
+        if (multiple_inputs)
+            operationResult = LZ4IO_decompressMultipleFilenames(inFileNames, ifnIdx, !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION);
+        else
+            operationResult = DEFAULT_DECOMPRESSOR(input_filename, output_filename);
+    } else {   /* compression is default action */
+        if (legacy_format) {
+            DISPLAYLEVEL(3, "! Generating compressed LZ4 using Legacy format (deprecated) ! \n");
+            LZ4IO_compressFilename_Legacy(input_filename, output_filename, cLevel);
+        } else {
+            if (multiple_inputs)
+                operationResult = LZ4IO_compressMultipleFilenames(inFileNames, ifnIdx, LZ4_EXTENSION, cLevel);
+            else
+                operationResult = DEFAULT_COMPRESSOR(input_filename, output_filename, cLevel);
+        }
+    }
+
+_cleanup:
+    if (main_pause) waitEnter();
+    if (dynNameSpace) free(dynNameSpace);
+#ifdef UTIL_HAS_CREATEFILELIST
+    if (extendedFileList)
+        UTIL_freeFileList(extendedFileList, fileNamesBuf);
+    else
+#endif
+        free((void*)inFileNames);
+    return operationResult;
+}
diff --git a/lz4/lz4cli/lz4cli.mk b/lz4/lz4cli/lz4cli.mk
new file mode 100644
index 0000000..3ee2b46
--- /dev/null
+++ b/lz4/lz4cli/lz4cli.mk
@@ -0,0 +1,3 @@
+LZ4CLI_SOURCES = lz4cli.c lz4io.c bench.c datagen.c
+$(eval $(call program,lz4cli,lz4,$(LZ4CLI_SOURCES)))
+$(eval $(call set_compile_option,$(LZ4CLI_SOURCES),-Ijml/lz4))
diff --git a/lz4/lz4cli/lz4io.c b/lz4/lz4cli/lz4io.c
new file mode 100644
index 0000000..06741b4
--- /dev/null
+++ b/lz4/lz4cli/lz4io.c
@@ -0,0 +1,1045 @@
+/*
+  LZ4io.c - LZ4 File/Stream Interface
+  Copyright (C) Yann Collet 2011-2017
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - LZ4 source repository : https://github.com/lz4/lz4
+  - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/*
+  Note : this is stand-alone program.
+  It is not part of LZ4 compression library, it is a user code of the LZ4 library.
+  - The license of LZ4 library is BSD.
+  - The license of xxHash library is BSD.
+  - The license of this source file is GPLv2.
+*/
+
+
+/*-************************************
+*  Compiler options
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)    /* disable: C4127: conditional expression is constant */
+#endif
+#if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
+#  define _POSIX_SOURCE 1          /* disable %llu warnings with MinGW on Windows */
+#endif
+
+
+/*****************************
+*  Includes
+*****************************/
+#include "platform.h"  /* Large File Support, SET_BINARY_MODE, SET_SPARSE_FILE_MODE, PLATFORM_POSIX_VERSION, __64BIT__ */
+#include "util.h"      /* UTIL_getFileStat, UTIL_setFileStat */
+#include <stdio.h>     /* fprintf, fopen, fread, stdin, stdout, fflush, getchar */
+#include <stdlib.h>    /* malloc, free */
+#include <string.h>    /* strerror, strcmp, strlen */
+#include <time.h>      /* clock */
+#include <sys/types.h> /* stat64 */
+#include <sys/stat.h>  /* stat64 */
+#include "lz4io.h"
+#include "lz4.h"       /* still required for legacy format */
+#include "lz4hc.h"     /* still required for legacy format */
+#include "lz4frame.h"
+
+
+/*****************************
+*  Constants
+*****************************/
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define _1BIT  0x01
+#define _2BITS 0x03
+#define _3BITS 0x07
+#define _4BITS 0x0F
+#define _8BITS 0xFF
+
+#define MAGICNUMBER_SIZE    4
+#define LZ4IO_MAGICNUMBER   0x184D2204
+#define LZ4IO_SKIPPABLE0    0x184D2A50
+#define LZ4IO_SKIPPABLEMASK 0xFFFFFFF0
+#define LEGACY_MAGICNUMBER  0x184C2102
+
+#define CACHELINE 64
+#define LEGACY_BLOCKSIZE   (8 MB)
+#define MIN_STREAM_BUFSIZE (192 KB)
+#define LZ4IO_BLOCKSIZEID_DEFAULT 7
+
+
+/**************************************
+*  Macros
+**************************************/
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static int g_displayLevel = 0;   /* 0 : no display  ; 1: errors  ; 2 : + result + interaction + warnings ; 3 : + progression; 4 : + information */
+
+#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
+            if (((clock_t)(g_time - clock()) > refreshRate) || (g_displayLevel>=4)) \
+            { g_time = clock(); DISPLAY(__VA_ARGS__); \
+            if (g_displayLevel>=4) fflush(stderr); } }
+static const clock_t refreshRate = CLOCKS_PER_SEC / 6;
+static clock_t g_time = 0;
+
+
+/**************************************
+*  Local Parameters
+**************************************/
+static int g_overwrite = 1;
+static int g_testMode = 0;
+static int g_blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT;
+static int g_blockChecksum = 0;
+static int g_streamChecksum = 1;
+static int g_blockIndependence = 1;
+static int g_sparseFileSupport = 1;
+static int g_contentSizeFlag = 0;
+
+
+/**************************************
+*  Exceptions
+***************************************/
+#ifndef DEBUG
+#  define DEBUG 0
+#endif
+#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
+#define EXM_THROW(error, ...)                                             \
+{                                                                         \
+    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+    DISPLAYLEVEL(1, "Error %i : ", error);                                \
+    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
+    DISPLAYLEVEL(1, " \n");                                               \
+    exit(error);                                                          \
+}
+
+
+/**************************************
+*  Version modifiers
+**************************************/
+#define EXTENDED_ARGUMENTS
+#define EXTENDED_HELP
+#define EXTENDED_FORMAT
+#define DEFAULT_DECOMPRESSOR LZ4IO_decompressLZ4F
+
+
+/* ************************************************** */
+/* ****************** Parameters ******************** */
+/* ************************************************** */
+
+/* Default setting : overwrite = 1; return : overwrite mode (0/1) */
+int LZ4IO_setOverwrite(int yes)
+{
+   g_overwrite = (yes!=0);
+   return g_overwrite;
+}
+
+/* Default setting : testMode = 0; return : testMode (0/1) */
+int LZ4IO_setTestMode(int yes)
+{
+   g_testMode = (yes!=0);
+   return g_testMode;
+}
+
+/* blockSizeID : valid values : 4-5-6-7 */
+size_t LZ4IO_setBlockSizeID(unsigned bsid)
+{
+    static const size_t blockSizeTable[] = { 64 KB, 256 KB, 1 MB, 4 MB };
+    static const unsigned minBlockSizeID = 4;
+    static const unsigned maxBlockSizeID = 7;
+    if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return 0;
+    g_blockSizeId = bsid;
+    return blockSizeTable[g_blockSizeId-minBlockSizeID];
+}
+
+int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode)
+{
+    g_blockIndependence = (blockMode == LZ4IO_blockIndependent);
+    return g_blockIndependence;
+}
+
+/* Default setting : no block checksum */
+int LZ4IO_setBlockChecksumMode(int enable)
+{
+    g_blockChecksum = (enable != 0);
+    return g_blockChecksum;
+}
+
+/* Default setting : checksum enabled */
+int LZ4IO_setStreamChecksumMode(int enable)
+{
+    g_streamChecksum = (enable != 0);
+    return g_streamChecksum;
+}
+
+/* Default setting : 0 (no notification) */
+int LZ4IO_setNotificationLevel(int level)
+{
+    g_displayLevel = level;
+    return g_displayLevel;
+}
+
+/* Default setting : 0 (disabled) */
+int LZ4IO_setSparseFile(int enable)
+{
+    g_sparseFileSupport = (enable!=0);
+    return g_sparseFileSupport;
+}
+
+/* Default setting : 0 (disabled) */
+int LZ4IO_setContentSize(int enable)
+{
+    g_contentSizeFlag = (enable!=0);
+    return g_contentSizeFlag;
+}
+
+static U32 g_removeSrcFile = 0;
+void LZ4IO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); }
+
+
+
+/* ************************************************************************ **
+** ********************** LZ4 File / Pipe compression ********************* **
+** ************************************************************************ */
+
+static int LZ4IO_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
+static int LZ4IO_isSkippableMagicNumber(unsigned int magic) {
+    return (magic & LZ4IO_SKIPPABLEMASK) == LZ4IO_SKIPPABLE0;
+}
+
+
+/** LZ4IO_openSrcFile() :
+ * condition : `srcFileName` must be non-NULL.
+ * @result : FILE* to `dstFileName`, or NULL if it fails */
+static FILE* LZ4IO_openSrcFile(const char* srcFileName)
+{
+    FILE* f;
+
+    if (!strcmp (srcFileName, stdinmark)) {
+        DISPLAYLEVEL(4,"Using stdin for input\n");
+        f = stdin;
+        SET_BINARY_MODE(stdin);
+    } else {
+        f = fopen(srcFileName, "rb");
+        if ( f==NULL ) DISPLAYLEVEL(1, "%s: %s \n", srcFileName, strerror(errno));
+    }
+
+    return f;
+}
+
+/** FIO_openDstFile() :
+ * condition : `dstFileName` must be non-NULL.
+ * @result : FILE* to `dstFileName`, or NULL if it fails */
+static FILE* LZ4IO_openDstFile(const char* dstFileName)
+{
+    FILE* f;
+
+    if (!strcmp (dstFileName, stdoutmark)) {
+        DISPLAYLEVEL(4,"Using stdout for output\n");
+        f = stdout;
+        SET_BINARY_MODE(stdout);
+        if (g_sparseFileSupport==1) {
+            g_sparseFileSupport = 0;
+            DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
+        }
+    } else {
+        if (!g_overwrite && strcmp (dstFileName, nulmark)) {  /* Check if destination file already exists */
+            f = fopen( dstFileName, "rb" );
+            if (f != NULL) {  /* dest exists, prompt for overwrite authorization */
+                fclose(f);
+                if (g_displayLevel <= 1) {  /* No interaction possible */
+                    DISPLAY("%s already exists; not overwritten  \n", dstFileName);
+                    return NULL;
+                }
+                DISPLAY("%s already exists; do you wish to overwrite (y/N) ? ", dstFileName);
+                {   int ch = getchar();
+                    if ((ch!='Y') && (ch!='y')) {
+                        DISPLAY("    not overwritten  \n");
+                        return NULL;
+                    }
+                    while ((ch!=EOF) && (ch!='\n')) ch = getchar();  /* flush rest of input line */
+        }   }   }
+        f = fopen( dstFileName, "wb" );
+        if (f==NULL) DISPLAYLEVEL(1, "%s: %s\n", dstFileName, strerror(errno));
+    }
+
+    /* sparse file */
+    if (f && g_sparseFileSupport) { SET_SPARSE_FILE_MODE(f); }
+
+    return f;
+}
+
+
+
+/***************************************
+*   Legacy Compression
+***************************************/
+
+/* unoptimized version; solves endianess & alignment issues */
+static void LZ4IO_writeLE32 (void* p, unsigned value32)
+{
+    unsigned char* const dstPtr = (unsigned char*)p;
+    dstPtr[0] = (unsigned char)value32;
+    dstPtr[1] = (unsigned char)(value32 >> 8);
+    dstPtr[2] = (unsigned char)(value32 >> 16);
+    dstPtr[3] = (unsigned char)(value32 >> 24);
+}
+
+static int LZ4IO_LZ4_compress(const char* src, char* dst, int srcSize, int dstSize, int cLevel)
+{
+    (void)cLevel;
+    return LZ4_compress_fast(src, dst, srcSize, dstSize, 1);
+}
+
+/* LZ4IO_compressFilename_Legacy :
+ * This function is intentionally "hidden" (not published in .h)
+ * It generates compressed streams using the old 'legacy' format */
+int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename, int compressionlevel)
+{
+    int (*compressionFunction)(const char* src, char* dst, int srcSize, int dstSize, int cLevel);
+    unsigned long long filesize = 0;
+    unsigned long long compressedfilesize = MAGICNUMBER_SIZE;
+    char* in_buff;
+    char* out_buff;
+    const int outBuffSize = LZ4_compressBound(LEGACY_BLOCKSIZE);
+    FILE* finput;
+    FILE* foutput;
+    clock_t clockEnd;
+
+    /* Init */
+    clock_t const clockStart = clock();
+    compressionFunction = (compressionlevel < 3) ? LZ4IO_LZ4_compress : LZ4_compress_HC;
+
+    finput = LZ4IO_openSrcFile(input_filename);
+    if (finput == NULL) EXM_THROW(20, "%s : open file error ", input_filename);
+    foutput = LZ4IO_openDstFile(output_filename);
+    if (foutput == NULL) { fclose(finput); EXM_THROW(20, "%s : open file error ", input_filename); }
+
+    /* Allocate Memory */
+    in_buff = (char*)malloc(LEGACY_BLOCKSIZE);
+    out_buff = (char*)malloc(outBuffSize);
+    if (!in_buff || !out_buff) EXM_THROW(21, "Allocation error : not enough memory");
+
+    /* Write Archive Header */
+    LZ4IO_writeLE32(out_buff, LEGACY_MAGICNUMBER);
+    { size_t const sizeCheck = fwrite(out_buff, 1, MAGICNUMBER_SIZE, foutput);
+      if (sizeCheck != MAGICNUMBER_SIZE) EXM_THROW(22, "Write error : cannot write header"); }
+
+    /* Main Loop */
+    while (1) {
+        unsigned int outSize;
+        /* Read Block */
+        size_t const inSize = (int) fread(in_buff, (size_t)1, (size_t)LEGACY_BLOCKSIZE, finput);
+        if (inSize == 0) break;
+        if (inSize > LEGACY_BLOCKSIZE) EXM_THROW(23, "Read error : wrong fread() size report ");   /* should be impossible */
+        filesize += inSize;
+
+        /* Compress Block */
+        outSize = compressionFunction(in_buff, out_buff+4, (int)inSize, outBuffSize, compressionlevel);
+        compressedfilesize += outSize+4;
+        DISPLAYUPDATE(2, "\rRead : %i MB  ==> %.2f%%   ",
+                (int)(filesize>>20), (double)compressedfilesize/filesize*100);
+
+        /* Write Block */
+        LZ4IO_writeLE32(out_buff, outSize);
+        {   size_t const sizeCheck = fwrite(out_buff, 1, outSize+4, foutput);
+            if (sizeCheck!=(size_t)(outSize+4))
+                EXM_THROW(24, "Write error : cannot write compressed block");
+    }   }
+    if (ferror(finput)) EXM_THROW(25, "Error while reading %s ", input_filename);
+
+    /* Status */
+    clockEnd = clock();
+    if (clockEnd==clockStart) clockEnd+=1;  /* avoid division by zero (speed) */
+    filesize += !filesize;   /* avoid division by zero (ratio) */
+    DISPLAYLEVEL(2, "\r%79s\r", "");   /* blank line */
+    DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
+        filesize, compressedfilesize, (double)compressedfilesize / filesize * 100);
+    {   double const seconds = (double)(clockEnd - clockStart) / CLOCKS_PER_SEC;
+        DISPLAYLEVEL(4,"Done in %.2f s ==> %.2f MB/s\n", seconds,
+                        (double)filesize / seconds / 1024 / 1024);
+    }
+
+    /* Close & Free */
+    free(in_buff);
+    free(out_buff);
+    fclose(finput);
+    fclose(foutput);
+
+    return 0;
+}
+
+
+/*********************************************
+*  Compression using Frame format
+*********************************************/
+
+typedef struct {
+    void*  srcBuffer;
+    size_t srcBufferSize;
+    void*  dstBuffer;
+    size_t dstBufferSize;
+    LZ4F_compressionContext_t ctx;
+} cRess_t;
+
+static cRess_t LZ4IO_createCResources(void)
+{
+    const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId);
+    cRess_t ress;
+
+    LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&(ress.ctx), LZ4F_VERSION);
+    if (LZ4F_isError(errorCode)) EXM_THROW(30, "Allocation error : can't create LZ4F context : %s", LZ4F_getErrorName(errorCode));
+
+    /* Allocate Memory */
+    ress.srcBuffer = malloc(blockSize);
+    ress.srcBufferSize = blockSize;
+    ress.dstBufferSize = LZ4F_compressFrameBound(blockSize, NULL);   /* cover worst case */
+    ress.dstBuffer = malloc(ress.dstBufferSize);
+    if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "Allocation error : not enough memory");
+
+    return ress;
+}
+
+static void LZ4IO_freeCResources(cRess_t ress)
+{
+    free(ress.srcBuffer);
+    free(ress.dstBuffer);
+    { LZ4F_errorCode_t const errorCode = LZ4F_freeCompressionContext(ress.ctx);
+      if (LZ4F_isError(errorCode)) EXM_THROW(38, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode)); }
+}
+
+/*
+ * LZ4IO_compressFilename_extRess()
+ * result : 0 : compression completed correctly
+ *          1 : missing or pb opening srcFileName
+ */
+static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName, const char* dstFileName, int compressionLevel)
+{
+    unsigned long long filesize = 0;
+    unsigned long long compressedfilesize = 0;
+    FILE* srcFile;
+    FILE* dstFile;
+    void* const srcBuffer = ress.srcBuffer;
+    void* const dstBuffer = ress.dstBuffer;
+    const size_t dstBufferSize = ress.dstBufferSize;
+    const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId);
+    size_t readSize;
+    LZ4F_compressionContext_t ctx = ress.ctx;   /* just a pointer */
+    LZ4F_preferences_t prefs;
+
+    /* Init */
+    srcFile = LZ4IO_openSrcFile(srcFileName);
+    if (srcFile == NULL) return 1;
+    dstFile = LZ4IO_openDstFile(dstFileName);
+    if (dstFile == NULL) { fclose(srcFile); return 1; }
+    memset(&prefs, 0, sizeof(prefs));
+
+
+    /* Set compression parameters */
+    prefs.autoFlush = 1;
+    prefs.compressionLevel = compressionLevel;
+    prefs.frameInfo.blockMode = (LZ4F_blockMode_t)g_blockIndependence;
+    prefs.frameInfo.blockSizeID = (LZ4F_blockSizeID_t)g_blockSizeId;
+    prefs.frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)g_blockChecksum;
+    prefs.frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)g_streamChecksum;
+    if (g_contentSizeFlag) {
+      U64 const fileSize = UTIL_getFileSize(srcFileName);
+      prefs.frameInfo.contentSize = fileSize;   /* == 0 if input == stdin */
+      if (fileSize==0)
+          DISPLAYLEVEL(3, "Warning : cannot determine input content size \n");
+    }
+
+    /* read first block */
+    readSize  = fread(srcBuffer, (size_t)1, blockSize, srcFile);
+    if (ferror(srcFile)) EXM_THROW(30, "Error reading %s ", srcFileName);
+    filesize += readSize;
+
+    /* single-block file */
+    if (readSize < blockSize) {
+        /* Compress in single pass */
+        size_t const cSize = LZ4F_compressFrame(dstBuffer, dstBufferSize, srcBuffer, readSize, &prefs);
+        if (LZ4F_isError(cSize)) EXM_THROW(31, "Compression failed : %s", LZ4F_getErrorName(cSize));
+        compressedfilesize = cSize;
+        DISPLAYUPDATE(2, "\rRead : %u MB   ==> %.2f%%   ",
+                      (unsigned)(filesize>>20), (double)compressedfilesize/(filesize+!filesize)*100);   /* avoid division by zero */
+
+        /* Write Block */
+        {   size_t const sizeCheck = fwrite(dstBuffer, 1, cSize, dstFile);
+            if (sizeCheck!=cSize) EXM_THROW(32, "Write error : cannot write compressed block");
+    }   }
+
+    else
+
+    /* multiple-blocks file */
+    {
+        /* Write Archive Header */
+        size_t headerSize = LZ4F_compressBegin(ctx, dstBuffer, dstBufferSize, &prefs);
+        if (LZ4F_isError(headerSize)) EXM_THROW(33, "File header generation failed : %s", LZ4F_getErrorName(headerSize));
+        { size_t const sizeCheck = fwrite(dstBuffer, 1, headerSize, dstFile);
+          if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); }
+        compressedfilesize += headerSize;
+
+        /* Main Loop */
+        while (readSize>0) {
+            size_t outSize;
+
+            /* Compress Block */
+            outSize = LZ4F_compressUpdate(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, NULL);
+            if (LZ4F_isError(outSize)) EXM_THROW(35, "Compression failed : %s", LZ4F_getErrorName(outSize));
+            compressedfilesize += outSize;
+            DISPLAYUPDATE(2, "\rRead : %u MB   ==> %.2f%%   ", (unsigned)(filesize>>20), (double)compressedfilesize/filesize*100);
+
+            /* Write Block */
+            { size_t const sizeCheck = fwrite(dstBuffer, 1, outSize, dstFile);
+              if (sizeCheck!=outSize) EXM_THROW(36, "Write error : cannot write compressed block"); }
+
+            /* Read next block */
+            readSize  = fread(srcBuffer, (size_t)1, (size_t)blockSize, srcFile);
+            filesize += readSize;
+        }
+        if (ferror(srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName);
+
+        /* End of Stream mark */
+        headerSize = LZ4F_compressEnd(ctx, dstBuffer, dstBufferSize, NULL);
+        if (LZ4F_isError(headerSize)) EXM_THROW(38, "End of file generation failed : %s", LZ4F_getErrorName(headerSize));
+
+        { size_t const sizeCheck = fwrite(dstBuffer, 1, headerSize, dstFile);
+          if (sizeCheck!=headerSize) EXM_THROW(39, "Write error : cannot write end of stream"); }
+        compressedfilesize += headerSize;
+    }
+
+    /* Release files */
+    fclose (srcFile);
+    fclose (dstFile);
+
+    /* Copy owner, file permissions and modification time */
+    {   stat_t statbuf;
+        if (strcmp (srcFileName, stdinmark) && strcmp (dstFileName, stdoutmark) && UTIL_getFileStat(srcFileName, &statbuf))
+            UTIL_setFileStat(dstFileName, &statbuf);
+    }
+
+    if (g_removeSrcFile) { if (remove(srcFileName)) EXM_THROW(40, "Remove error : %s: %s", srcFileName, strerror(errno)); } /* remove source file : --rm */
+
+    /* Final Status */
+    DISPLAYLEVEL(2, "\r%79s\r", "");
+    DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
+        filesize, compressedfilesize, (double)compressedfilesize/(filesize + !filesize)*100);   /* avoid division by zero */
+
+    return 0;
+}
+
+
+int LZ4IO_compressFilename(const char* srcFileName, const char* dstFileName, int compressionLevel)
+{
+    clock_t const start = clock();
+    cRess_t const ress = LZ4IO_createCResources();
+
+    int const issueWithSrcFile = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel);
+
+    /* Free resources */
+    LZ4IO_freeCResources(ress);
+
+    /* Final Status */
+    {   clock_t const end = clock();
+        double const seconds = (double)(end - start) / CLOCKS_PER_SEC;
+        DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
+    }
+
+    return issueWithSrcFile;
+}
+
+
+#define FNSPACE 30
+int LZ4IO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix, int compressionLevel)
+{
+    int i;
+    int missed_files = 0;
+    char* dstFileName = (char*)malloc(FNSPACE);
+    size_t ofnSize = FNSPACE;
+    const size_t suffixSize = strlen(suffix);
+    cRess_t ress;
+
+    if (dstFileName == NULL) return ifntSize;   /* not enough memory */
+    ress = LZ4IO_createCResources();
+
+    /* loop on each file */
+    for (i=0; i<ifntSize; i++) {
+        size_t const ifnSize = strlen(inFileNamesTable[i]);
+        if (ofnSize <= ifnSize+suffixSize+1) { free(dstFileName); ofnSize = ifnSize + 20; dstFileName = (char*)malloc(ofnSize); if (dstFileName==NULL) { LZ4IO_freeCResources(ress); return ifntSize; } }
+        strcpy(dstFileName, inFileNamesTable[i]);
+        strcat(dstFileName, suffix);
+
+        missed_files += LZ4IO_compressFilename_extRess(ress, inFileNamesTable[i], dstFileName, compressionLevel);
+    }
+
+    /* Close & Free */
+    LZ4IO_freeCResources(ress);
+    free(dstFileName);
+
+    return missed_files;
+}
+
+
+/* ********************************************************************* */
+/* ********************** LZ4 file-stream Decompression **************** */
+/* ********************************************************************* */
+
+static unsigned LZ4IO_readLE32 (const void* s)
+{
+    const unsigned char* const srcPtr = (const unsigned char*)s;
+    unsigned value32 = srcPtr[0];
+    value32 += (srcPtr[1]<<8);
+    value32 += (srcPtr[2]<<16);
+    value32 += ((unsigned)srcPtr[3])<<24;
+    return value32;
+}
+
+
+static unsigned LZ4IO_fwriteSparse(FILE* file, const void* buffer, size_t bufferSize, unsigned storedSkips)
+{
+    const size_t sizeT = sizeof(size_t);
+    const size_t maskT = sizeT -1 ;
+    const size_t* const bufferT = (const size_t*)buffer;   /* Buffer is supposed malloc'ed, hence aligned on size_t */
+    const size_t* ptrT = bufferT;
+    size_t bufferSizeT = bufferSize / sizeT;
+    const size_t* const bufferTEnd = bufferT + bufferSizeT;
+    const size_t segmentSizeT = (32 KB) / sizeT;
+
+    if (!g_sparseFileSupport) {  /* normal write */
+        size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
+        if (sizeCheck != bufferSize) EXM_THROW(70, "Write error : cannot write decoded block");
+        return 0;
+    }
+
+    /* avoid int overflow */
+    if (storedSkips > 1 GB) {
+        int const seekResult = UTIL_fseek(file, 1 GB, SEEK_CUR);
+        if (seekResult != 0) EXM_THROW(71, "1 GB skip error (sparse file support)");
+        storedSkips -= 1 GB;
+    }
+
+    while (ptrT < bufferTEnd) {
+        size_t seg0SizeT = segmentSizeT;
+        size_t nb0T;
+
+        /* count leading zeros */
+        if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT;
+        bufferSizeT -= seg0SizeT;
+        for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ;
+        storedSkips += (unsigned)(nb0T * sizeT);
+
+        if (nb0T != seg0SizeT) {   /* not all 0s */
+            errno = 0;
+            {   int const seekResult = UTIL_fseek(file, storedSkips, SEEK_CUR);
+                if (seekResult) EXM_THROW(72, "Sparse skip error(%d): %s ; try --no-sparse", (int)errno, strerror(errno));
+            }
+            storedSkips = 0;
+            seg0SizeT -= nb0T;
+            ptrT += nb0T;
+            {   size_t const sizeCheck = fwrite(ptrT, sizeT, seg0SizeT, file);
+                if (sizeCheck != seg0SizeT) EXM_THROW(73, "Write error : cannot write decoded block");
+        }   }
+        ptrT += seg0SizeT;
+    }
+
+    if (bufferSize & maskT) {  /* size not multiple of sizeT : implies end of block */
+        const char* const restStart = (const char*)bufferTEnd;
+        const char* restPtr = restStart;
+        size_t const restSize =  bufferSize & maskT;
+        const char* const restEnd = restStart + restSize;
+        for (; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ;
+        storedSkips += (unsigned) (restPtr - restStart);
+        if (restPtr != restEnd) {
+            int const seekResult = UTIL_fseek(file, storedSkips, SEEK_CUR);
+            if (seekResult) EXM_THROW(74, "Sparse skip error ; try --no-sparse");
+            storedSkips = 0;
+            {   size_t const sizeCheck = fwrite(restPtr, 1, restEnd - restPtr, file);
+                if (sizeCheck != (size_t)(restEnd - restPtr)) EXM_THROW(75, "Write error : cannot write decoded end of block");
+        }   }
+    }
+
+    return storedSkips;
+}
+
+static void LZ4IO_fwriteSparseEnd(FILE* file, unsigned storedSkips)
+{
+    if (storedSkips>0) {   /* implies g_sparseFileSupport>0 */
+        int const seekResult = UTIL_fseek(file, storedSkips-1, SEEK_CUR);
+        if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)\n");
+        {   const char lastZeroByte[1] = { 0 };
+            size_t const sizeCheck = fwrite(lastZeroByte, 1, 1, file);
+            if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero\n");
+    }   }
+}
+
+
+static unsigned g_magicRead = 0;   /* out-parameter of LZ4IO_decodeLegacyStream() */
+static unsigned long long LZ4IO_decodeLegacyStream(FILE* finput, FILE* foutput)
+{
+    unsigned long long streamSize = 0;
+    unsigned storedSkips = 0;
+
+    /* Allocate Memory */
+    char* const in_buff  = (char*)malloc(LZ4_compressBound(LEGACY_BLOCKSIZE));
+    char* const out_buff = (char*)malloc(LEGACY_BLOCKSIZE);
+    if (!in_buff || !out_buff) EXM_THROW(51, "Allocation error : not enough memory");
+
+    /* Main Loop */
+    while (1) {
+        unsigned int blockSize;
+
+        /* Block Size */
+        {   size_t const sizeCheck = fread(in_buff, 1, 4, finput);
+            if (sizeCheck == 0) break;                   /* Nothing to read : file read is completed */
+            if (sizeCheck != 4) EXM_THROW(52, "Read error : cannot access block size "); }
+            blockSize = LZ4IO_readLE32(in_buff);       /* Convert to Little Endian */
+            if (blockSize > LZ4_COMPRESSBOUND(LEGACY_BLOCKSIZE)) {
+            /* Cannot read next block : maybe new stream ? */
+            g_magicRead = blockSize;
+            break;
+        }
+
+        /* Read Block */
+        { size_t const sizeCheck = fread(in_buff, 1, blockSize, finput);
+          if (sizeCheck!=blockSize) EXM_THROW(52, "Read error : cannot access compressed block !"); }
+
+        /* Decode Block */
+        {   int const decodeSize = LZ4_decompress_safe(in_buff, out_buff, blockSize, LEGACY_BLOCKSIZE);
+            if (decodeSize < 0) EXM_THROW(53, "Decoding Failed ! Corrupted input detected !");
+            streamSize += decodeSize;
+            /* Write Block */
+            storedSkips = LZ4IO_fwriteSparse(foutput, out_buff, decodeSize, storedSkips); /* success or die */
+    }   }
+    if (ferror(finput)) EXM_THROW(54, "Read error : ferror");
+
+    LZ4IO_fwriteSparseEnd(foutput, storedSkips);
+
+    /* Free */
+    free(in_buff);
+    free(out_buff);
+
+    return streamSize;
+}
+
+
+
+typedef struct {
+    void*  srcBuffer;
+    size_t srcBufferSize;
+    void*  dstBuffer;
+    size_t dstBufferSize;
+    FILE*  dstFile;
+    LZ4F_decompressionContext_t dCtx;
+} dRess_t;
+
+static const size_t LZ4IO_dBufferSize = 64 KB;
+static dRess_t LZ4IO_createDResources(void)
+{
+    dRess_t ress;
+
+    /* init */
+    LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&ress.dCtx, LZ4F_VERSION);
+    if (LZ4F_isError(errorCode)) EXM_THROW(60, "Can't create LZ4F context : %s", LZ4F_getErrorName(errorCode));
+
+    /* Allocate Memory */
+    ress.srcBufferSize = LZ4IO_dBufferSize;
+    ress.srcBuffer = malloc(ress.srcBufferSize);
+    ress.dstBufferSize = LZ4IO_dBufferSize;
+    ress.dstBuffer = malloc(ress.dstBufferSize);
+    if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(61, "Allocation error : not enough memory");
+
+    ress.dstFile = NULL;
+    return ress;
+}
+
+static void LZ4IO_freeDResources(dRess_t ress)
+{
+    LZ4F_errorCode_t errorCode = LZ4F_freeDecompressionContext(ress.dCtx);
+    if (LZ4F_isError(errorCode)) EXM_THROW(69, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode));
+    free(ress.srcBuffer);
+    free(ress.dstBuffer);
+}
+
+
+static unsigned long long LZ4IO_decompressLZ4F(dRess_t ress, FILE* srcFile, FILE* dstFile)
+{
+    unsigned long long filesize = 0;
+    LZ4F_errorCode_t nextToLoad;
+    unsigned storedSkips = 0;
+
+    /* Init feed with magic number (already consumed from FILE* sFile) */
+    {   size_t inSize = MAGICNUMBER_SIZE;
+        size_t outSize= 0;
+        LZ4IO_writeLE32(ress.srcBuffer, LZ4IO_MAGICNUMBER);
+        nextToLoad = LZ4F_decompress(ress.dCtx, ress.dstBuffer, &outSize, ress.srcBuffer, &inSize, NULL);
+        if (LZ4F_isError(nextToLoad)) EXM_THROW(62, "Header error : %s", LZ4F_getErrorName(nextToLoad));
+    }
+
+    /* Main Loop */
+    for (;nextToLoad;) {
+        size_t readSize;
+        size_t pos = 0;
+        size_t decodedBytes = ress.dstBufferSize;
+
+        /* Read input */
+        if (nextToLoad > ress.srcBufferSize) nextToLoad = ress.srcBufferSize;
+        readSize = fread(ress.srcBuffer, 1, nextToLoad, srcFile);
+        if (!readSize) break;   /* reached end of file or stream */
+
+        while ((pos < readSize) || (decodedBytes == ress.dstBufferSize)) {  /* still to read, or still to flush */
+            /* Decode Input (at least partially) */
+            size_t remaining = readSize - pos;
+            decodedBytes = ress.dstBufferSize;
+            nextToLoad = LZ4F_decompress(ress.dCtx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, NULL);
+            if (LZ4F_isError(nextToLoad)) EXM_THROW(66, "Decompression error : %s", LZ4F_getErrorName(nextToLoad));
+            pos += remaining;
+
+            /* Write Block */
+            if (decodedBytes) {
+                if (!g_testMode)
+                    storedSkips = LZ4IO_fwriteSparse(dstFile, ress.dstBuffer, decodedBytes, storedSkips);
+                filesize += decodedBytes;
+                DISPLAYUPDATE(2, "\rDecompressed : %u MB  ", (unsigned)(filesize>>20));
+            }
+
+            if (!nextToLoad) break;
+        }
+    }
+    /* can be out because readSize == 0, which could be an fread() error */
+    if (ferror(srcFile)) EXM_THROW(67, "Read error");
+
+    if (!g_testMode) LZ4IO_fwriteSparseEnd(dstFile, storedSkips);
+    if (nextToLoad!=0) EXM_THROW(68, "Unfinished stream");
+
+    return filesize;
+}
+
+
+#define PTSIZE  (64 KB)
+#define PTSIZET (PTSIZE / sizeof(size_t))
+static unsigned long long LZ4IO_passThrough(FILE* finput, FILE* foutput, unsigned char MNstore[MAGICNUMBER_SIZE])
+{
+	size_t buffer[PTSIZET];
+    size_t readBytes = 1;
+    unsigned long long total = MAGICNUMBER_SIZE;
+    unsigned storedSkips = 0;
+
+    size_t const sizeCheck = fwrite(MNstore, 1, MAGICNUMBER_SIZE, foutput);
+    if (sizeCheck != MAGICNUMBER_SIZE) EXM_THROW(50, "Pass-through write error");
+
+    while (readBytes) {
+        readBytes = fread(buffer, 1, PTSIZE, finput);
+        total += readBytes;
+        storedSkips = LZ4IO_fwriteSparse(foutput, buffer, readBytes, storedSkips);
+    }
+    if (ferror(finput)) EXM_THROW(51, "Read Error")
+
+    LZ4IO_fwriteSparseEnd(foutput, storedSkips);
+    return total;
+}
+
+
+/** Safely handle cases when (unsigned)offset > LONG_MAX */
+static int fseek_u32(FILE *fp, unsigned offset, int where)
+{
+    const unsigned stepMax = 1U << 30;
+    int errorNb = 0;
+
+    if (where != SEEK_CUR) return -1;  /* Only allows SEEK_CUR */
+    while (offset > 0) {
+        unsigned s = offset;
+        if (s > stepMax) s = stepMax;
+        errorNb = UTIL_fseek(fp, (long) s, SEEK_CUR);
+        if (errorNb != 0) break;
+        offset -= s;
+    }
+    return errorNb;
+}
+
+#define ENDOFSTREAM ((unsigned long long)-1)
+static unsigned long long selectDecoder(dRess_t ress, FILE* finput, FILE* foutput)
+{
+    unsigned char MNstore[MAGICNUMBER_SIZE];
+    unsigned magicNumber;
+    static unsigned nbFrames = 0;
+
+    /* init */
+    nbFrames++;
+
+    /* Check Archive Header */
+    if (g_magicRead) {  /* magic number already read from finput (see legacy frame)*/
+        magicNumber = g_magicRead;
+        g_magicRead = 0;
+    } else {
+        size_t const nbReadBytes = fread(MNstore, 1, MAGICNUMBER_SIZE, finput);
+        if (nbReadBytes==0) { nbFrames = 0; return ENDOFSTREAM; }   /* EOF */
+        if (nbReadBytes != MAGICNUMBER_SIZE)
+          EXM_THROW(40, "Unrecognized header : Magic Number unreadable");
+        magicNumber = LZ4IO_readLE32(MNstore);   /* Little Endian format */
+    }
+    if (LZ4IO_isSkippableMagicNumber(magicNumber))
+        magicNumber = LZ4IO_SKIPPABLE0;   /* fold skippable magic numbers */
+
+    switch(magicNumber)
+    {
+    case LZ4IO_MAGICNUMBER:
+        return LZ4IO_decompressLZ4F(ress, finput, foutput);
+    case LEGACY_MAGICNUMBER:
+        DISPLAYLEVEL(4, "Detected : Legacy format \n");
+        return LZ4IO_decodeLegacyStream(finput, foutput);
+    case LZ4IO_SKIPPABLE0:
+        DISPLAYLEVEL(4, "Skipping detected skippable area \n");
+        {   size_t const nbReadBytes = fread(MNstore, 1, 4, finput);
+            if (nbReadBytes != 4)
+                EXM_THROW(42, "Stream error : skippable size unreadable");
+        }
+        {   unsigned const size = LZ4IO_readLE32(MNstore);
+            int const errorNb = fseek_u32(finput, size, SEEK_CUR);
+            if (errorNb != 0)
+                EXM_THROW(43, "Stream error : cannot skip skippable area");
+        }
+        return 0;
+    EXTENDED_FORMAT;  /* macro extension for custom formats */
+    default:
+        if (nbFrames == 1) {  /* just started */
+            /* Wrong magic number at the beginning of 1st stream */
+            if (!g_testMode && g_overwrite) {
+                nbFrames = 0;
+                return LZ4IO_passThrough(finput, foutput, MNstore);
+            }
+            EXM_THROW(44,"Unrecognized header : file cannot be decoded");
+        }
+        {   long int const position = ftell(finput);  /* only works for files < 2 GB */
+            DISPLAYLEVEL(2, "Stream followed by undecodable data ");
+            if (position != -1L)
+                DISPLAYLEVEL(2, "at position %i ", (int)position);
+            DISPLAYLEVEL(2, "\n");
+        }
+        return ENDOFSTREAM;
+    }
+}
+
+
+static int LZ4IO_decompressSrcFile(dRess_t ress, const char* input_filename, const char* output_filename)
+{
+    FILE* const foutput = ress.dstFile;
+    unsigned long long filesize = 0;
+
+    /* Init */
+    FILE* const finput = LZ4IO_openSrcFile(input_filename);
+    if (finput==NULL) return 1;
+
+    /* Loop over multiple streams */
+    for ( ; ; ) {  /* endless loop, see break condition */
+        unsigned long long const decodedSize =
+                        selectDecoder(ress, finput, foutput);
+        if (decodedSize == ENDOFSTREAM) break;
+        filesize += decodedSize;
+    }
+
+    /* Close input */
+    fclose(finput);
+    if (g_removeSrcFile) {  /* --rm */
+        if (remove(input_filename))
+            EXM_THROW(45, "Remove error : %s: %s", input_filename, strerror(errno));
+    }
+
+    /* Final Status */
+    DISPLAYLEVEL(2, "\r%79s\r", "");
+    DISPLAYLEVEL(2, "%-20.20s : decoded %llu bytes \n", input_filename, filesize);
+    (void)output_filename;
+
+    return 0;
+}
+
+
+static int LZ4IO_decompressDstFile(dRess_t ress, const char* input_filename, const char* output_filename)
+{
+    FILE* const foutput = LZ4IO_openDstFile(output_filename);
+    if (foutput==NULL) return 1;   /* failure */
+
+    ress.dstFile = foutput;
+    LZ4IO_decompressSrcFile(ress, input_filename, output_filename);
+
+    fclose(foutput);
+
+    /* Copy owner, file permissions and modification time */
+    {   stat_t statbuf;
+        if ( strcmp (input_filename, stdinmark)
+          && strcmp (output_filename, stdoutmark)
+          && strcmp (output_filename, nulmark)
+          && UTIL_getFileStat(input_filename, &statbuf) ) {
+            UTIL_setFileStat(output_filename, &statbuf);
+            /* should return value be read ? or is silent fail good enough ? */
+    }   }
+
+    return 0;
+}
+
+
+int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename)
+{
+    dRess_t const ress = LZ4IO_createDResources();
+    clock_t const start = clock();
+
+    int const missingFiles = LZ4IO_decompressDstFile(ress, input_filename, output_filename);
+
+    clock_t const end = clock();
+    double const seconds = (double)(end - start) / CLOCKS_PER_SEC;
+    DISPLAYLEVEL(4, "Done in %.2f sec  \n", seconds);
+
+    LZ4IO_freeDResources(ress);
+    return missingFiles;
+}
+
+
+int LZ4IO_decompressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix)
+{
+    int i;
+    int skippedFiles = 0;
+    int missingFiles = 0;
+    char* outFileName = (char*)malloc(FNSPACE);
+    size_t ofnSize = FNSPACE;
+    size_t const suffixSize = strlen(suffix);
+    dRess_t ress = LZ4IO_createDResources();
+
+    if (outFileName==NULL) return ifntSize;   /* not enough memory */
+    ress.dstFile = LZ4IO_openDstFile(stdoutmark);
+
+    for (i=0; i<ifntSize; i++) {
+        size_t const ifnSize = strlen(inFileNamesTable[i]);
+        const char* const suffixPtr = inFileNamesTable[i] + ifnSize - suffixSize;
+        if (!strcmp(suffix, stdoutmark)) {
+            missingFiles += LZ4IO_decompressSrcFile(ress, inFileNamesTable[i], stdoutmark);
+            continue;
+        }
+        if (ofnSize <= ifnSize-suffixSize+1) { free(outFileName); ofnSize = ifnSize + 20; outFileName = (char*)malloc(ofnSize); if (outFileName==NULL) return ifntSize; }
+        if (ifnSize <= suffixSize  ||  strcmp(suffixPtr, suffix) != 0) {
+            DISPLAYLEVEL(1, "File extension doesn't match expected LZ4_EXTENSION (%4s); will not process file: %s\n", suffix, inFileNamesTable[i]);
+            skippedFiles++;
+            continue;
+        }
+        memcpy(outFileName, inFileNamesTable[i], ifnSize - suffixSize);
+        outFileName[ifnSize-suffixSize] = '\0';
+        missingFiles += LZ4IO_decompressDstFile(ress, inFileNamesTable[i], outFileName);
+    }
+
+    LZ4IO_freeDResources(ress);
+    free(outFileName);
+    return missingFiles + skippedFiles;
+}
diff --git a/lz4/lz4cli/lz4io.h b/lz4/lz4cli/lz4io.h
new file mode 100644
index 0000000..6190f00
--- /dev/null
+++ b/lz4/lz4cli/lz4io.h
@@ -0,0 +1,101 @@
+/*
+  LZ4io.h - LZ4 File/Stream Interface
+  Copyright (C) Yann Collet 2011-2016
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - LZ4 source repository : https://github.com/lz4/lz4
+  - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/*
+  Note : this is stand-alone program.
+  It is not part of LZ4 compression library, it is a user code of the LZ4 library.
+  - The license of LZ4 library is BSD.
+  - The license of xxHash library is BSD.
+  - The license of this source file is GPLv2.
+*/
+
+#ifndef LZ4IO_H_237902873
+#define LZ4IO_H_237902873
+
+/*---   Dependency   ---*/
+#include <stddef.h>   /* size_t */
+
+
+/* ************************************************** */
+/* Special input/output values                        */
+/* ************************************************** */
+#define NULL_OUTPUT "null"
+static const char stdinmark[]  = "stdin";
+static const char stdoutmark[] = "stdout";
+#ifdef _WIN32
+static const char nulmark[] = "nul";
+#else
+static const char nulmark[] = "/dev/null";
+#endif
+
+
+/* ************************************************** */
+/* ****************** Functions ********************* */
+/* ************************************************** */
+
+int LZ4IO_compressFilename  (const char* input_filename, const char* output_filename, int compressionlevel);
+int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename);
+
+int LZ4IO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix, int compressionlevel);
+int LZ4IO_decompressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix);
+
+
+/* ************************************************** */
+/* ****************** Parameters ******************** */
+/* ************************************************** */
+
+/* Default setting : overwrite = 1;
+   return : overwrite mode (0/1) */
+int LZ4IO_setOverwrite(int yes);
+
+/* Default setting : testMode = 0;
+   return : testMode (0/1) */
+int LZ4IO_setTestMode(int yes);
+
+/* blockSizeID : valid values : 4-5-6-7
+   return : 0 if error, blockSize if OK */
+size_t LZ4IO_setBlockSizeID(unsigned blockSizeID);
+
+/* Default setting : independent blocks */
+typedef enum { LZ4IO_blockLinked=0, LZ4IO_blockIndependent} LZ4IO_blockMode_t;
+int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode);
+
+/* Default setting : no block checksum */
+int LZ4IO_setBlockChecksumMode(int xxhash);
+
+/* Default setting : stream checksum enabled */
+int LZ4IO_setStreamChecksumMode(int xxhash);
+
+/* Default setting : 0 (no notification) */
+int LZ4IO_setNotificationLevel(int level);
+
+/* Default setting : 0 (disabled) */
+int LZ4IO_setSparseFile(int enable);
+
+/* Default setting : 0 (disabled) */
+int LZ4IO_setContentSize(int enable);
+
+void LZ4IO_setRemoveSrcFile(unsigned flag);
+
+
+#endif  /* LZ4IO_H_237902873 */
diff --git a/lz4/lz4cli/platform.h b/lz4/lz4cli/platform.h
new file mode 100644
index 0000000..66491b6
--- /dev/null
+++ b/lz4/lz4cli/platform.h
@@ -0,0 +1,154 @@
+/*
+    platform.h - compiler and OS detection
+    Copyright (C) 2016-present, Przemyslaw Skibinski, Yann Collet
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#ifndef PLATFORM_H_MODULE
+#define PLATFORM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* **************************************
+*  Compiler Options
+****************************************/
+#if defined(_MSC_VER)
+#  define _CRT_SECURE_NO_WARNINGS   /* Disable Visual Studio warning messages for fopen, strncpy, strerror */
+#  define _CRT_SECURE_NO_DEPRECATE  /* VS2005 - must be declared before <io.h> and <windows.h> */ 
+#  if (_MSC_VER <= 1800)            /* (1800 = Visual Studio 2013) */
+#    define snprintf sprintf_s      /* snprintf unsupported by Visual <= 2013 */
+#  endif
+#endif
+
+
+/* **************************************
+*  Detect 64-bit OS
+*  http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros
+****************************************/
+#if defined __ia64 || defined _M_IA64                                                                               /* Intel Itanium */ \
+  || defined __powerpc64__ || defined __ppc64__ || defined __PPC64__                                                /* POWER 64-bit */  \
+  || (defined __sparc && (defined __sparcv9 || defined __sparc_v9__ || defined __arch64__)) || defined __sparc64__  /* SPARC 64-bit */  \
+  || defined __x86_64__s || defined _M_X64                                                                          /* x86 64-bit */    \
+  || defined __arm64__ || defined __aarch64__ || defined __ARM64_ARCH_8__                                           /* ARM 64-bit */    \
+  || (defined __mips  && (__mips == 64 || __mips == 4 || __mips == 3))                                              /* MIPS 64-bit */   \
+  || defined _LP64 || defined __LP64__ /* NetBSD, OpenBSD */ || defined __64BIT__ /* AIX */ || defined _ADDR64 /* Cray */               \
+  || (defined __SIZEOF_POINTER__ && __SIZEOF_POINTER__ == 8) /* gcc */
+#  if !defined(__64BIT__)
+#    define __64BIT__  1
+#  endif
+#endif
+
+
+/* *********************************************************
+*  Turn on Large Files support (>4GB) for 32-bit Linux/Unix
+***********************************************************/
+#if !defined(__64BIT__) || defined(__MINGW32__)       /* No point defining Large file for 64 bit but MinGW-w64 requires it */
+#  if !defined(_FILE_OFFSET_BITS)   
+#    define _FILE_OFFSET_BITS 64                      /* turn off_t into a 64-bit type for ftello, fseeko */
+#  endif
+#  if !defined(_LARGEFILE_SOURCE)                     /* obsolete macro, replaced with _FILE_OFFSET_BITS */
+#    define _LARGEFILE_SOURCE 1                       /* Large File Support extension (LFS) - fseeko, ftello */
+#  endif
+#  if defined(_AIX) || defined(__hpux)
+#    define _LARGE_FILES                              /* Large file support on 32-bits AIX and HP-UX */
+#  endif
+#endif
+
+
+/* ************************************************************
+*  Detect POSIX version
+*  PLATFORM_POSIX_VERSION = -1 for non-Unix e.g. Windows
+*  PLATFORM_POSIX_VERSION = 0 for Unix-like non-POSIX
+*  PLATFORM_POSIX_VERSION >= 1 is equal to found _POSIX_VERSION
+***************************************************************/
+#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \
+   || defined(__midipix__) || defined(__VMS))
+#  if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1–2001 (SUSv3) conformant */ \
+     || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)  /* BSD distros */
+#    define PLATFORM_POSIX_VERSION 200112L
+#  else
+#    if defined(__linux__) || defined(__linux)
+#      ifndef _POSIX_C_SOURCE
+#        define _POSIX_C_SOURCE 200112L  /* use feature test macro */
+#      endif
+#    endif
+#    include <unistd.h>  /* declares _POSIX_VERSION */
+#    if defined(_POSIX_VERSION)  /* POSIX compliant */
+#      define PLATFORM_POSIX_VERSION _POSIX_VERSION
+#    else
+#      define PLATFORM_POSIX_VERSION 0
+#    endif
+#  endif
+#endif
+#if !defined(PLATFORM_POSIX_VERSION)
+#  define PLATFORM_POSIX_VERSION -1
+#endif
+
+
+/*-*********************************************
+*  Detect if isatty() and fileno() are available
+************************************************/
+#if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) || (PLATFORM_POSIX_VERSION >= 200112L) || defined(__DJGPP__)
+#  include <unistd.h>   /* isatty */
+#  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
+#elif defined(MSDOS) || defined(OS2) || defined(__CYGWIN__)
+#  include <io.h>       /* _isatty */
+#  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
+#elif defined(WIN32) || defined(_WIN32)
+#  include <io.h>      /* _isatty */
+#  include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
+#  include <stdio.h>   /* FILE */
+static __inline int IS_CONSOLE(FILE* stdStream)
+{
+    DWORD dummy;
+    return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
+}
+#else
+#  define IS_CONSOLE(stdStream) 0
+#endif
+
+
+/******************************
+*  OS-specific Includes
+******************************/
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
+#  include <fcntl.h>   /* _O_BINARY */
+#  include <io.h>      /* _setmode, _fileno, _get_osfhandle */
+#  if !defined(__DJGPP__)
+#    include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
+#    include <winioctl.h> /* FSCTL_SET_SPARSE */
+#    define SET_BINARY_MODE(file) { int unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
+#    define SET_SPARSE_FILE_MODE(file) { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); }
+#  else
+#    define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
+#    define SET_SPARSE_FILE_MODE(file)
+#  endif
+#else
+#  define SET_BINARY_MODE(file)
+#  define SET_SPARSE_FILE_MODE(file)
+#endif
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* PLATFORM_H_MODULE */
diff --git a/lz4/lz4cli/util.h b/lz4/lz4cli/util.h
new file mode 100644
index 0000000..5a69c55
--- /dev/null
+++ b/lz4/lz4cli/util.h
@@ -0,0 +1,494 @@
+/*
+    util.h - utility functions
+    Copyright (C) 2016-present, Przemyslaw Skibinski, Yann Collet
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#ifndef UTIL_H_MODULE
+#define UTIL_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include "platform.h"     /* PLATFORM_POSIX_VERSION */
+#include <stdlib.h>       /* malloc */
+#include <stddef.h>       /* size_t, ptrdiff_t */
+#include <stdio.h>        /* fprintf */
+#include <sys/types.h>    /* stat, utime */
+#include <sys/stat.h>     /* stat */
+#if defined(_MSC_VER)
+#  include <sys/utime.h>  /* utime */
+#  include <io.h>         /* _chmod */
+#else
+#  include <unistd.h>     /* chown, stat */
+#  include <utime.h>      /* utime */
+#endif
+#include <time.h>         /* time */
+#include <errno.h>
+
+
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/* ************************************************************
+* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
+***************************************************************/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+#   define UTIL_fseek _fseeki64
+#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
+#  define UTIL_fseek fseeko
+#elif defined(__MINGW32__) && defined(__MSVCRT__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS)
+#   define UTIL_fseek fseeko64
+#else
+#   define UTIL_fseek fseek
+#endif
+
+
+/*-****************************************
+*  Sleep functions: Windows - Posix - others
+******************************************/
+#if defined(_WIN32)
+#  include <windows.h>
+#  define SET_REALTIME_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS)
+#  define UTIL_sleep(s) Sleep(1000*s)
+#  define UTIL_sleepMilli(milli) Sleep(milli)
+#elif PLATFORM_POSIX_VERSION >= 0 /* Unix-like operating system */
+#  include <unistd.h>
+#  include <sys/resource.h> /* setpriority */
+#  include <time.h>         /* clock_t, nanosleep, clock, CLOCKS_PER_SEC */
+#  if defined(PRIO_PROCESS)
+#    define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20)
+#  else
+#    define SET_REALTIME_PRIORITY /* disabled */
+#  endif
+#  define UTIL_sleep(s) sleep(s)
+#  if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) || (PLATFORM_POSIX_VERSION >= 200112L)  /* nanosleep requires POSIX.1-2001 */
+#      define UTIL_sleepMilli(milli) { struct timespec t; t.tv_sec=0; t.tv_nsec=milli*1000000ULL; nanosleep(&t, NULL); }
+#  else
+#      define UTIL_sleepMilli(milli) /* disabled */
+#  endif
+#else
+#  define SET_REALTIME_PRIORITY      /* disabled */
+#  define UTIL_sleep(s)          /* disabled */
+#  define UTIL_sleepMilli(milli) /* disabled */
+#endif
+
+
+/* *************************************
+*  Constants
+***************************************/
+#define LIST_SIZE_INCREASE   (8*1024)
+
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(__INTEL_COMPILER)
+#  pragma warning(disable : 177)    /* disable: message #177: function was declared but never referenced, useful with UTIL_STATIC */
+#endif
+#if defined(__GNUC__)
+#  define UTIL_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define UTIL_STATIC static inline
+#elif defined(_MSC_VER)
+#  define UTIL_STATIC static __inline
+#else
+#  define UTIL_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-****************************************
+*  Time functions
+******************************************/
+#if (PLATFORM_POSIX_VERSION >= 1)
+#include <unistd.h>
+#include <sys/times.h>   /* times */
+   typedef U64 UTIL_time_t;
+   UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { *ticksPerSecond=sysconf(_SC_CLK_TCK); }
+   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { struct tms junk; clock_t newTicks = (clock_t) times(&junk); (void)junk; *x = (UTIL_time_t)newTicks; }
+   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / ticksPerSecond; }
+   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / ticksPerSecond; }
+#elif defined(_WIN32)   /* Windows */
+   typedef LARGE_INTEGER UTIL_time_t;
+   UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { if (!QueryPerformanceFrequency(ticksPerSecond)) fprintf(stderr, "ERROR: QueryPerformance not present\n"); }
+   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { QueryPerformanceCounter(x); }
+   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; }
+   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; }
+#else   /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
+   typedef clock_t UTIL_time_t;
+   UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { *ticksPerSecond=0; }
+   UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { *x = clock(); }
+   UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
+   UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
+#endif
+
+
+/* returns time span in microseconds */
+UTIL_STATIC U64 UTIL_clockSpanMicro( UTIL_time_t clockStart, UTIL_time_t ticksPerSecond )
+{
+    UTIL_time_t clockEnd;
+    UTIL_getTime(&clockEnd);
+    return UTIL_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd);
+}
+
+
+UTIL_STATIC void UTIL_waitForNextTick(UTIL_time_t ticksPerSecond)
+{
+    UTIL_time_t clockStart, clockEnd;
+    UTIL_getTime(&clockStart);
+    do {
+        UTIL_getTime(&clockEnd);
+    } while (UTIL_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) == 0);
+}
+
+
+
+/*-****************************************
+*  File functions
+******************************************/
+#if defined(_MSC_VER)
+    #define chmod _chmod
+    typedef struct __stat64 stat_t;
+#else
+    typedef struct stat stat_t;
+#endif
+
+
+UTIL_STATIC int UTIL_setFileStat(const char *filename, stat_t *statbuf)
+{
+    int res = 0;
+    struct utimbuf timebuf;
+
+    timebuf.actime = time(NULL);
+    timebuf.modtime = statbuf->st_mtime;
+    res += utime(filename, &timebuf);  /* set access and modification times */
+
+#if !defined(_WIN32)
+    res += chown(filename, statbuf->st_uid, statbuf->st_gid);  /* Copy ownership */
+#endif
+
+    res += chmod(filename, statbuf->st_mode & 07777);  /* Copy file permissions */
+
+    errno = 0;
+    return -res; /* number of errors is returned */
+}
+
+
+UTIL_STATIC int UTIL_getFileStat(const char* infilename, stat_t *statbuf)
+{
+    int r;
+#if defined(_MSC_VER)
+    r = _stat64(infilename, statbuf);
+    if (r || !(statbuf->st_mode & S_IFREG)) return 0;   /* No good... */
+#else
+    r = stat(infilename, statbuf);
+    if (r || !S_ISREG(statbuf->st_mode)) return 0;   /* No good... */
+#endif
+    return 1;
+}
+
+
+UTIL_STATIC int UTIL_isRegFile(const char* infilename)
+{
+    stat_t statbuf;
+    return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */
+}
+
+
+UTIL_STATIC U32 UTIL_isDirectory(const char* infilename)
+{
+    int r;
+    stat_t statbuf;
+#if defined(_MSC_VER)
+    r = _stat64(infilename, &statbuf);
+    if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
+#else
+    r = stat(infilename, &statbuf);
+    if (!r && S_ISDIR(statbuf.st_mode)) return 1;
+#endif
+    return 0;
+}
+
+
+UTIL_STATIC U64 UTIL_getFileSize(const char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct __stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+    if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */
+#elif defined(__MINGW32__) && defined (__MSVCRT__)
+    struct _stati64 statbuf;
+    r = _stati64(infilename, &statbuf);
+    if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
+#endif
+    return (U64)statbuf.st_size;
+}
+
+
+UTIL_STATIC U64 UTIL_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles)
+{
+    U64 total = 0;
+    unsigned n;
+    for (n=0; n<nbFiles; n++)
+        total += UTIL_getFileSize(fileNamesTable[n]);
+    return total;
+}
+
+
+/*
+ * A modified version of realloc().
+ * If UTIL_realloc() fails the original block is freed.
+*/
+UTIL_STATIC void *UTIL_realloc(void *ptr, size_t size)
+{
+    void *newptr = realloc(ptr, size);
+    if (newptr) return newptr;
+    free(ptr);
+    return NULL;
+}
+
+
+#ifdef _WIN32
+#  define UTIL_HAS_CREATEFILELIST
+
+UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd)
+{
+    char* path;
+    int dirLength, fnameLength, pathLength, nbFiles = 0;
+    WIN32_FIND_DATAA cFile;
+    HANDLE hFile;
+
+    dirLength = (int)strlen(dirName);
+    path = (char*) malloc(dirLength + 3);
+    if (!path) return 0;
+
+    memcpy(path, dirName, dirLength);
+    path[dirLength] = '\\';
+    path[dirLength+1] = '*';
+    path[dirLength+2] = 0;
+
+    hFile=FindFirstFileA(path, &cFile);
+    if (hFile == INVALID_HANDLE_VALUE) {
+        fprintf(stderr, "Cannot open directory '%s'\n", dirName);
+        return 0;
+    }
+    free(path);
+
+    do {
+        fnameLength = (int)strlen(cFile.cFileName);
+        path = (char*) malloc(dirLength + fnameLength + 2);
+        if (!path) { FindClose(hFile); return 0; }
+        memcpy(path, dirName, dirLength);
+        path[dirLength] = '\\';
+        memcpy(path+dirLength+1, cFile.cFileName, fnameLength);
+        pathLength = dirLength+1+fnameLength;
+        path[pathLength] = 0;
+        if (cFile.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+            if (strcmp (cFile.cFileName, "..") == 0 ||
+                strcmp (cFile.cFileName, ".") == 0) continue;
+
+            nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd);  /* Recursively call "UTIL_prepareFileList" with the new path. */
+            if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; }
+        }
+        else if ((cFile.dwFileAttributes & FILE_ATTRIBUTE_NORMAL) || (cFile.dwFileAttributes & FILE_ATTRIBUTE_ARCHIVE) || (cFile.dwFileAttributes & FILE_ATTRIBUTE_COMPRESSED)) {
+            if (*bufStart + *pos + pathLength >= *bufEnd) {
+                ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE;
+                *bufStart = (char*)UTIL_realloc(*bufStart, newListSize);
+                *bufEnd = *bufStart + newListSize;
+                if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; }
+            }
+            if (*bufStart + *pos + pathLength < *bufEnd) {
+                strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos));
+                *pos += pathLength + 1;
+                nbFiles++;
+            }
+        }
+        free(path);
+    } while (FindNextFileA(hFile, &cFile));
+
+    FindClose(hFile);
+    return nbFiles;
+}
+
+#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L)  /* opendir, readdir require POSIX.1-2001 */
+#  define UTIL_HAS_CREATEFILELIST
+#  include <dirent.h>       /* opendir, readdir */
+#  include <string.h>       /* strerror, memcpy */
+
+UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd)
+{
+    DIR *dir;
+    struct dirent *entry;
+    char* path;
+    int dirLength, fnameLength, pathLength, nbFiles = 0;
+
+    if (!(dir = opendir(dirName))) {
+        fprintf(stderr, "Cannot open directory '%s': %s\n", dirName, strerror(errno));
+        return 0;
+    }
+
+    dirLength = (int)strlen(dirName);
+    errno = 0;
+    while ((entry = readdir(dir)) != NULL) {
+        if (strcmp (entry->d_name, "..") == 0 ||
+            strcmp (entry->d_name, ".") == 0) continue;
+        fnameLength = (int)strlen(entry->d_name);
+        path = (char*) malloc(dirLength + fnameLength + 2);
+        if (!path) { closedir(dir); return 0; }
+        memcpy(path, dirName, dirLength);
+        path[dirLength] = '/';
+        memcpy(path+dirLength+1, entry->d_name, fnameLength);
+        pathLength = dirLength+1+fnameLength;
+        path[pathLength] = 0;
+
+        if (UTIL_isDirectory(path)) {
+            nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd);  /* Recursively call "UTIL_prepareFileList" with the new path. */
+            if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
+        } else {
+            if (*bufStart + *pos + pathLength >= *bufEnd) {
+                ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE;
+                *bufStart = (char*)UTIL_realloc(*bufStart, newListSize);
+                *bufEnd = *bufStart + newListSize;
+                if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
+            }
+            if (*bufStart + *pos + pathLength < *bufEnd) {
+                strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos));
+                *pos += pathLength + 1;
+                nbFiles++;
+            }
+        }
+        free(path);
+        errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */
+    }
+
+    if (errno != 0) {
+        fprintf(stderr, "readdir(%s) error: %s\n", dirName, strerror(errno));
+        free(*bufStart);
+        *bufStart = NULL;
+    }
+    closedir(dir);
+    return nbFiles;
+}
+
+#else
+
+UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd)
+{
+    (void)bufStart; (void)bufEnd; (void)pos;
+    fprintf(stderr, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
+    return 0;
+}
+
+#endif /* #ifdef _WIN32 */
+
+/*
+ * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories,
+ *                       and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb).
+ * After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
+ * In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
+ */
+UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb)
+{
+    size_t pos;
+    unsigned i, nbFiles;
+    char* buf = (char*)malloc(LIST_SIZE_INCREASE);
+    char* bufend = buf + LIST_SIZE_INCREASE;
+    const char** fileTable;
+
+    if (!buf) return NULL;
+
+    for (i=0, pos=0, nbFiles=0; i<inputNamesNb; i++) {
+        if (!UTIL_isDirectory(inputNames[i])) {
+            size_t const len = strlen(inputNames[i]);
+            if (buf + pos + len >= bufend) {
+                ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE;
+                buf = (char*)UTIL_realloc(buf, newListSize);
+                bufend = buf + newListSize;
+                if (!buf) return NULL;
+            }
+            if (buf + pos + len < bufend) {
+                strncpy(buf + pos, inputNames[i], bufend - (buf + pos));
+                pos += len + 1;
+                nbFiles++;
+            }
+        } else {
+            nbFiles += UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend);
+            if (buf == NULL) return NULL;
+    }   }
+
+    if (nbFiles == 0) { free(buf); return NULL; }
+
+    fileTable = (const char**)malloc((nbFiles+1) * sizeof(const char*));
+    if (!fileTable) { free(buf); return NULL; }
+
+    for (i=0, pos=0; i<nbFiles; i++) {
+        fileTable[i] = buf + pos;
+        pos += strlen(fileTable[i]) + 1;
+    }
+
+    if (buf + pos > bufend) { free(buf); free((void*)fileTable); return NULL; }
+
+    *allocatedBuffer = buf;
+    *allocatedNamesNb = nbFiles;
+
+    return fileTable;
+}
+
+
+UTIL_STATIC void UTIL_freeFileList(const char** filenameTable, char* allocatedBuffer)
+{
+    if (allocatedBuffer) free(allocatedBuffer);
+    if (filenameTable) free((void*)filenameTable);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* UTIL_H_MODULE */
diff --git a/lz4/lz4frame.c b/lz4/lz4frame.c
new file mode 100644
index 0000000..3408708
--- /dev/null
+++ b/lz4/lz4frame.c
@@ -0,0 +1,1669 @@
+/*
+LZ4 auto-framing library
+Copyright (C) 2011-2016, Yann Collet.
+
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- LZ4 homepage : http://www.lz4.org
+- LZ4 source repository : https://github.com/lz4/lz4
+*/
+
+/* LZ4F is a stand-alone API to create LZ4-compressed Frames
+*  in full conformance with specification v1.5.0
+*  All related operations, including memory management, are handled by the library.
+* */
+
+
+/*-************************************
+*  Compiler Options
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/*-************************************
+*  Memory routines
+**************************************/
+#include <stdlib.h>   /* malloc, calloc, free */
+#define ALLOCATOR(s)   calloc(1,s)
+#define FREEMEM        free
+#include <string.h>   /* memset, memcpy, memmove */
+#define MEM_INIT       memset
+
+
+/*-************************************
+*  Includes
+**************************************/
+#include "lz4frame_static.h"
+#include "lz4.h"
+#define LZ4_HC_STATIC_LINKING_ONLY
+#include "lz4hc.h"
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/*-************************************
+*  Debug
+**************************************/
+#define LZ4F_STATIC_ASSERT(c)    { enum { LZ4F_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/*-************************************
+*  Basic Types
+**************************************/
+#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+
+/* unoptimized version; solves endianess & alignment issues */
+static U32 LZ4F_readLE32 (const void* src)
+{
+    const BYTE* const srcPtr = (const BYTE*)src;
+    U32 value32 = srcPtr[0];
+    value32 += (srcPtr[1]<<8);
+    value32 += (srcPtr[2]<<16);
+    value32 += ((U32)srcPtr[3])<<24;
+    return value32;
+}
+
+static void LZ4F_writeLE32 (void* dst, U32 value32)
+{
+    BYTE* const dstPtr = (BYTE*)dst;
+    dstPtr[0] = (BYTE)value32;
+    dstPtr[1] = (BYTE)(value32 >> 8);
+    dstPtr[2] = (BYTE)(value32 >> 16);
+    dstPtr[3] = (BYTE)(value32 >> 24);
+}
+
+static U64 LZ4F_readLE64 (const void* src)
+{
+    const BYTE* const srcPtr = (const BYTE*)src;
+    U64 value64 = srcPtr[0];
+    value64 += ((U64)srcPtr[1]<<8);
+    value64 += ((U64)srcPtr[2]<<16);
+    value64 += ((U64)srcPtr[3]<<24);
+    value64 += ((U64)srcPtr[4]<<32);
+    value64 += ((U64)srcPtr[5]<<40);
+    value64 += ((U64)srcPtr[6]<<48);
+    value64 += ((U64)srcPtr[7]<<56);
+    return value64;
+}
+
+static void LZ4F_writeLE64 (void* dst, U64 value64)
+{
+    BYTE* const dstPtr = (BYTE*)dst;
+    dstPtr[0] = (BYTE)value64;
+    dstPtr[1] = (BYTE)(value64 >> 8);
+    dstPtr[2] = (BYTE)(value64 >> 16);
+    dstPtr[3] = (BYTE)(value64 >> 24);
+    dstPtr[4] = (BYTE)(value64 >> 32);
+    dstPtr[5] = (BYTE)(value64 >> 40);
+    dstPtr[6] = (BYTE)(value64 >> 48);
+    dstPtr[7] = (BYTE)(value64 >> 56);
+}
+
+
+/*-************************************
+*  Constants
+**************************************/
+#define KB *(1<<10)
+#define MB *(1<<20)
+#define GB *(1<<30)
+
+#define _1BIT  0x01
+#define _2BITS 0x03
+#define _3BITS 0x07
+#define _4BITS 0x0F
+#define _8BITS 0xFF
+
+#define LZ4F_MAGIC_SKIPPABLE_START 0x184D2A50U
+#define LZ4F_MAGICNUMBER 0x184D2204U
+#define LZ4F_BLOCKUNCOMPRESSED_FLAG 0x80000000U
+#define LZ4F_BLOCKSIZEID_DEFAULT LZ4F_max64KB
+
+static const size_t minFHSize = 7;
+static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX;   /* 19 */
+static const size_t BHSize = 4;
+
+
+/*-************************************
+*  Structures and local types
+**************************************/
+typedef struct LZ4F_cctx_s
+{
+    LZ4F_preferences_t prefs;
+    U32    version;
+    U32    cStage;
+    const LZ4F_CDict* cdict;
+    size_t maxBlockSize;
+    size_t maxBufferSize;
+    BYTE*  tmpBuff;
+    BYTE*  tmpIn;
+    size_t tmpInSize;
+    U64    totalInSize;
+    XXH32_state_t xxh;
+    void*  lz4CtxPtr;
+    U32    lz4CtxLevel;   /* 0: unallocated;  1: LZ4_stream_t;  3: LZ4_streamHC_t */
+} LZ4F_cctx_t;
+
+
+/*-************************************
+*  Error management
+**************************************/
+#define LZ4F_GENERATE_STRING(STRING) #STRING,
+static const char* LZ4F_errorStrings[] = { LZ4F_LIST_ERRORS(LZ4F_GENERATE_STRING) };
+
+
+unsigned LZ4F_isError(LZ4F_errorCode_t code)
+{
+    return (code > (LZ4F_errorCode_t)(-LZ4F_ERROR_maxCode));
+}
+
+const char* LZ4F_getErrorName(LZ4F_errorCode_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (LZ4F_isError(code)) return LZ4F_errorStrings[-(int)(code)];
+    return codeError;
+}
+
+LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult)
+{
+    if (!LZ4F_isError(functionResult)) return LZ4F_OK_NoError;
+    return (LZ4F_errorCodes)(-(ptrdiff_t)functionResult);
+}
+
+static LZ4F_errorCode_t err0r(LZ4F_errorCodes code)
+{
+    /* A compilation error here means sizeof(ptrdiff_t) is not large enough */
+    LZ4F_STATIC_ASSERT(sizeof(ptrdiff_t) >= sizeof(size_t));
+    return (LZ4F_errorCode_t)-(ptrdiff_t)code;
+}
+
+unsigned LZ4F_getVersion(void) { return LZ4F_VERSION; }
+
+int LZ4F_compressionLevel_max(void) { return LZ4HC_CLEVEL_MAX; }
+
+
+/*-************************************
+*  Private functions
+**************************************/
+#define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
+
+static size_t LZ4F_getBlockSize(unsigned blockSizeID)
+{
+    static const size_t blockSizes[4] = { 64 KB, 256 KB, 1 MB, 4 MB };
+
+    if (blockSizeID == 0) blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+    blockSizeID -= 4;
+    if (blockSizeID > 3) return err0r(LZ4F_ERROR_maxBlockSize_invalid);
+    return blockSizes[blockSizeID];
+}
+
+static BYTE LZ4F_headerChecksum (const void* header, size_t length)
+{
+    U32 const xxh = XXH32(header, length, 0);
+    return (BYTE)(xxh >> 8);
+}
+
+
+/*-************************************
+*  Simple-pass compression functions
+**************************************/
+static LZ4F_blockSizeID_t LZ4F_optimalBSID(const LZ4F_blockSizeID_t requestedBSID,
+                                           const size_t srcSize)
+{
+    LZ4F_blockSizeID_t proposedBSID = LZ4F_max64KB;
+    size_t maxBlockSize = 64 KB;
+    while (requestedBSID > proposedBSID) {
+        if (srcSize <= maxBlockSize)
+            return proposedBSID;
+        proposedBSID = (LZ4F_blockSizeID_t)((int)proposedBSID + 1);
+        maxBlockSize <<= 2;
+    }
+    return requestedBSID;
+}
+
+/*! LZ4F_compressBound_internal() :
+ *  Provides dstCapacity given a srcSize to guarantee operation success in worst case situations.
+ *  prefsPtr is optional : if NULL is provided, preferences will be set to cover worst case scenario.
+ * @return is always the same for a srcSize and prefsPtr, so it can be relied upon to size reusable buffers.
+ *  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
+ */
+static size_t LZ4F_compressBound_internal(size_t srcSize,
+                                    const LZ4F_preferences_t* preferencesPtr,
+                                          size_t alreadyBuffered)
+{
+    LZ4F_preferences_t prefsNull;
+    memset(&prefsNull, 0, sizeof(prefsNull));
+    prefsNull.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;   /* worst case */
+    {   const LZ4F_preferences_t* const prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
+        U32 const flush = prefsPtr->autoFlush | (srcSize==0);
+        LZ4F_blockSizeID_t const blockID = prefsPtr->frameInfo.blockSizeID;
+        size_t const blockSize = LZ4F_getBlockSize(blockID);
+        size_t const maxBuffered = blockSize - 1;
+        size_t const bufferedSize = MIN(alreadyBuffered, maxBuffered);
+        size_t const maxSrcSize = srcSize + bufferedSize;
+        unsigned const nbFullBlocks = (unsigned)(maxSrcSize / blockSize);
+        size_t const partialBlockSize = (srcSize - (srcSize==0)) & (blockSize-1);   /* 0 => -1 == MAX => blockSize-1 */
+        size_t const lastBlockSize = flush ? partialBlockSize : 0;
+        unsigned const nbBlocks = nbFullBlocks + (lastBlockSize>0);
+
+        size_t const blockHeaderSize = 4;
+        size_t const blockCRCSize = 4 * prefsPtr->frameInfo.blockChecksumFlag;
+        size_t const frameEnd = 4 + (prefsPtr->frameInfo.contentChecksumFlag*4);
+
+        return ((blockHeaderSize + blockCRCSize) * nbBlocks) +
+               (blockSize * nbFullBlocks) + lastBlockSize + frameEnd;
+    }
+}
+
+size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_preferences_t prefs;
+    size_t const headerSize = maxFHSize;      /* max header size, including optional fields */
+
+    if (preferencesPtr!=NULL) prefs = *preferencesPtr;
+    else memset(&prefs, 0, sizeof(prefs));
+    prefs.autoFlush = 1;
+
+    return headerSize + LZ4F_compressBound_internal(srcSize, &prefs, 0);;
+}
+
+
+/*! LZ4F_compressFrame_usingCDict() :
+ *  Compress srcBuffer using a dictionary, in a single step.
+ *  cdict can be NULL, in which case, no dictionary is used.
+ *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+ *  however, it's the only way to provide a dictID, so it's not recommended.
+ * @return : number of bytes written into dstBuffer,
+ *           or an error code if it fails (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressFrame_usingCDict(void* dstBuffer, size_t dstCapacity,
+                               const void* srcBuffer, size_t srcSize,
+                               const LZ4F_CDict* cdict,
+                               const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_cctx_t cctxI;
+    LZ4_stream_t lz4ctx;
+    LZ4F_preferences_t prefs;
+    LZ4F_compressOptions_t options;
+    BYTE* const dstStart = (BYTE*) dstBuffer;
+    BYTE* dstPtr = dstStart;
+    BYTE* const dstEnd = dstStart + dstCapacity;
+
+    memset(&cctxI, 0, sizeof(cctxI));
+    cctxI.version = LZ4F_VERSION;
+    cctxI.maxBufferSize = 5 MB;   /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */
+
+    if (preferencesPtr!=NULL)
+        prefs = *preferencesPtr;
+    else
+        memset(&prefs, 0, sizeof(prefs));
+    if (prefs.frameInfo.contentSize != 0)
+        prefs.frameInfo.contentSize = (U64)srcSize;   /* auto-correct content size if selected (!=0) */
+
+    prefs.frameInfo.blockSizeID = LZ4F_optimalBSID(prefs.frameInfo.blockSizeID, srcSize);
+    prefs.autoFlush = 1;
+    if (srcSize <= LZ4F_getBlockSize(prefs.frameInfo.blockSizeID))
+        prefs.frameInfo.blockMode = LZ4F_blockIndependent;   /* only one block => no need for inter-block link */
+
+    if (prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
+        cctxI.lz4CtxPtr = &lz4ctx;
+        cctxI.lz4CtxLevel = 1;
+    }  /* fast compression context pre-created on stack */
+
+    memset(&options, 0, sizeof(options));
+    options.stableSrc = 1;
+
+    if (dstCapacity < LZ4F_compressFrameBound(srcSize, &prefs))  /* condition to guarantee success */
+        return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+
+    { size_t const headerSize = LZ4F_compressBegin_usingCDict(&cctxI, dstBuffer, dstCapacity, cdict, &prefs);  /* write header */
+      if (LZ4F_isError(headerSize)) return headerSize;
+      dstPtr += headerSize;   /* header size */ }
+
+    { size_t const cSize = LZ4F_compressUpdate(&cctxI, dstPtr, dstEnd-dstPtr, srcBuffer, srcSize, &options);
+      if (LZ4F_isError(cSize)) return cSize;
+      dstPtr += cSize; }
+
+    { size_t const tailSize = LZ4F_compressEnd(&cctxI, dstPtr, dstEnd-dstPtr, &options);   /* flush last block, and generate suffix */
+      if (LZ4F_isError(tailSize)) return tailSize;
+      dstPtr += tailSize; }
+
+    if (prefs.compressionLevel >= LZ4HC_CLEVEL_MIN)  /* Ctx allocation only for lz4hc */
+        FREEMEM(cctxI.lz4CtxPtr);
+
+    return (dstPtr - dstStart);
+}
+
+
+/*! LZ4F_compressFrame() :
+ *  Compress an entire srcBuffer into a valid LZ4 frame, in a single step.
+ *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * @return : number of bytes written into dstBuffer.
+ *           or an error code if it fails (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+                    const void* srcBuffer, size_t srcSize,
+                    const LZ4F_preferences_t* preferencesPtr)
+{
+    return LZ4F_compressFrame_usingCDict(dstBuffer, dstCapacity,
+                                         srcBuffer, srcSize,
+                                         NULL, preferencesPtr);
+}
+
+
+/*-***************************************************
+*   Dictionary compression
+*****************************************************/
+
+struct LZ4F_CDict_s {
+    void* dictContent;
+    LZ4_stream_t* fastCtx;
+    LZ4_streamHC_t* HCCtx;
+}; /* typedef'd to LZ4F_CDict within lz4frame_static.h */
+
+/*! LZ4F_createCDict() :
+ *  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+ *  LZ4F_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ *  LZ4F_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * `dictBuffer` can be released after LZ4F_CDict creation, since its content is copied within CDict
+ * @return : digested dictionary for compression, or NULL if failed */
+LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize)
+{
+    const char* dictStart = (const char*)dictBuffer;
+    LZ4F_CDict* cdict = (LZ4F_CDict*) malloc(sizeof(*cdict));
+    if (!cdict) return NULL;
+    if (dictSize > 64 KB) {
+        dictStart += dictSize - 64 KB;
+        dictSize = 64 KB;
+    }
+    cdict->dictContent = ALLOCATOR(dictSize);
+    cdict->fastCtx = LZ4_createStream();
+    cdict->HCCtx = LZ4_createStreamHC();
+    if (!cdict->dictContent || !cdict->fastCtx || !cdict->HCCtx) {
+        LZ4F_freeCDict(cdict);
+        return NULL;
+    }
+    memcpy(cdict->dictContent, dictStart, dictSize);
+    LZ4_resetStream(cdict->fastCtx);
+    LZ4_loadDict (cdict->fastCtx, (const char*)cdict->dictContent, (int)dictSize);
+    LZ4_resetStreamHC(cdict->HCCtx, LZ4HC_CLEVEL_DEFAULT);
+    LZ4_loadDictHC(cdict->HCCtx, (const char*)cdict->dictContent, (int)dictSize);
+    return cdict;
+}
+
+void LZ4F_freeCDict(LZ4F_CDict* cdict)
+{
+    if (cdict==NULL) return;  /* support free on NULL */
+    FREEMEM(cdict->dictContent);
+    LZ4_freeStream(cdict->fastCtx);
+    LZ4_freeStreamHC(cdict->HCCtx);
+    FREEMEM(cdict);
+}
+
+
+/*-*********************************
+*  Advanced compression functions
+***********************************/
+
+/*! LZ4F_createCompressionContext() :
+ *  The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ *  This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
+ *  The version provided MUST be LZ4F_VERSION. It is intended to track potential incompatible differences between different binaries.
+ *  The function will provide a pointer to an allocated LZ4F_compressionContext_t object.
+ *  If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation.
+ *  Object can release its memory using LZ4F_freeCompressionContext();
+ */
+LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_compressionContextPtr, unsigned version)
+{
+    LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)ALLOCATOR(sizeof(LZ4F_cctx_t));
+    if (cctxPtr==NULL) return err0r(LZ4F_ERROR_allocation_failed);
+
+    cctxPtr->version = version;
+    cctxPtr->cStage = 0;   /* Next stage : init stream */
+
+    *LZ4F_compressionContextPtr = (LZ4F_compressionContext_t)cctxPtr;
+
+    return LZ4F_OK_NoError;
+}
+
+
+LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t LZ4F_compressionContext)
+{
+    LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)LZ4F_compressionContext;
+
+    if (cctxPtr != NULL) {  /* support free on NULL */
+       FREEMEM(cctxPtr->lz4CtxPtr);  /* works because LZ4_streamHC_t and LZ4_stream_t are simple POD types */
+       FREEMEM(cctxPtr->tmpBuff);
+       FREEMEM(LZ4F_compressionContext);
+    }
+
+    return LZ4F_OK_NoError;
+}
+
+
+/*! LZ4F_compressBegin_usingCDict() :
+ *  init streaming compression and writes frame header into dstBuffer.
+ *  dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * @return : number of bytes written into dstBuffer for the header
+ *           or an error code (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
+                          void* dstBuffer, size_t dstCapacity,
+                          const LZ4F_CDict* cdict,
+                          const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_preferences_t prefNull;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    BYTE* headerStart;
+
+    if (dstCapacity < maxFHSize) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+    memset(&prefNull, 0, sizeof(prefNull));
+    if (preferencesPtr == NULL) preferencesPtr = &prefNull;
+    cctxPtr->prefs = *preferencesPtr;
+
+    /* Ctx Management */
+    {   U32 const tableID = (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) ? 1 : 2;  /* 0:nothing ; 1:LZ4 table ; 2:HC tables */
+        if (cctxPtr->lz4CtxLevel < tableID) {
+            FREEMEM(cctxPtr->lz4CtxPtr);
+            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN)
+                cctxPtr->lz4CtxPtr = (void*)LZ4_createStream();
+            else
+                cctxPtr->lz4CtxPtr = (void*)LZ4_createStreamHC();
+            if (cctxPtr->lz4CtxPtr == NULL) return err0r(LZ4F_ERROR_allocation_failed);
+            cctxPtr->lz4CtxLevel = tableID;
+    }   }
+
+    /* Buffer Management */
+    if (cctxPtr->prefs.frameInfo.blockSizeID == 0)
+        cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+    cctxPtr->maxBlockSize = LZ4F_getBlockSize(cctxPtr->prefs.frameInfo.blockSizeID);
+
+    {   size_t const requiredBuffSize = preferencesPtr->autoFlush ?
+                (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 64 KB :  /* only needs windows size */
+                cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 128 KB);
+
+        if (cctxPtr->maxBufferSize < requiredBuffSize) {
+            cctxPtr->maxBufferSize = 0;
+            FREEMEM(cctxPtr->tmpBuff);
+            cctxPtr->tmpBuff = (BYTE*)ALLOCATOR(requiredBuffSize);
+            if (cctxPtr->tmpBuff == NULL) return err0r(LZ4F_ERROR_allocation_failed);
+            cctxPtr->maxBufferSize = requiredBuffSize;
+    }   }
+    cctxPtr->tmpIn = cctxPtr->tmpBuff;
+    cctxPtr->tmpInSize = 0;
+    XXH32_reset(&(cctxPtr->xxh), 0);
+
+    /* context init */
+    cctxPtr->cdict = cdict;
+    if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) {
+        /* frame init only for blockLinked : blockIndependent will be init at each block */
+        if (cdict) {
+            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
+                memcpy(cctxPtr->lz4CtxPtr, cdict->fastCtx, sizeof(*cdict->fastCtx));
+            } else {
+                memcpy(cctxPtr->lz4CtxPtr, cdict->HCCtx, sizeof(*cdict->HCCtx));
+                LZ4_setCompressionLevel((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+            }
+        } else {
+            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN)
+                LZ4_resetStream((LZ4_stream_t*)(cctxPtr->lz4CtxPtr));
+            else
+                LZ4_resetStreamHC((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), cctxPtr->prefs.compressionLevel);
+        }
+    }
+
+    /* Magic Number */
+    LZ4F_writeLE32(dstPtr, LZ4F_MAGICNUMBER);
+    dstPtr += 4;
+    headerStart = dstPtr;
+
+    /* FLG Byte */
+    *dstPtr++ = (BYTE)(((1 & _2BITS) << 6)    /* Version('01') */
+        + ((cctxPtr->prefs.frameInfo.blockMode & _1BIT ) << 5)
+        + ((cctxPtr->prefs.frameInfo.blockChecksumFlag & _1BIT ) << 4)
+        + ((cctxPtr->prefs.frameInfo.contentSize > 0) << 3)
+        + ((cctxPtr->prefs.frameInfo.contentChecksumFlag & _1BIT ) << 2)
+        +  (cctxPtr->prefs.frameInfo.dictID > 0) );
+    /* BD Byte */
+    *dstPtr++ = (BYTE)((cctxPtr->prefs.frameInfo.blockSizeID & _3BITS) << 4);
+    /* Optional Frame content size field */
+    if (cctxPtr->prefs.frameInfo.contentSize) {
+        LZ4F_writeLE64(dstPtr, cctxPtr->prefs.frameInfo.contentSize);
+        dstPtr += 8;
+        cctxPtr->totalInSize = 0;
+    }
+    /* Optional dictionary ID field */
+    if (cctxPtr->prefs.frameInfo.dictID) {
+        LZ4F_writeLE32(dstPtr, cctxPtr->prefs.frameInfo.dictID);
+        dstPtr += 4;
+    }
+    /* Header CRC Byte */
+    *dstPtr = LZ4F_headerChecksum(headerStart, dstPtr - headerStart);
+    dstPtr++;
+
+    cctxPtr->cStage = 1;   /* header written, now request input data block */
+    return (dstPtr - dstStart);
+}
+
+
+/*! LZ4F_compressBegin() :
+ *  init streaming compression and writes frame header into dstBuffer.
+ *  dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ *  preferencesPtr can be NULL, in which case default parameters are selected.
+ * @return : number of bytes written into dstBuffer for the header
+ *           or an error code (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr,
+                          void* dstBuffer, size_t dstCapacity,
+                          const LZ4F_preferences_t* preferencesPtr)
+{
+    return LZ4F_compressBegin_usingCDict(cctxPtr, dstBuffer, dstCapacity,
+                                         NULL, preferencesPtr);
+}
+
+
+/* LZ4F_compressBound() :
+ *      @ return size of Dst buffer given a srcSize to handle worst case situations.
+ *      The LZ4F_frameInfo_t structure is optional : if NULL, preferences will be set to cover worst case situations.
+ *      This function cannot fail.
+ */
+size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    return LZ4F_compressBound_internal(srcSize, preferencesPtr, (size_t)-1);
+}
+
+
+typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level, const LZ4F_CDict* cdict);
+
+
+/*! LZ4F_makeBlock():
+ *  compress a single block, add header and checksum
+ *  assumption : dst buffer capacity is >= srcSize */
+static size_t LZ4F_makeBlock(void* dst, const void* src, size_t srcSize,
+                             compressFunc_t compress, void* lz4ctx, int level,
+                             const LZ4F_CDict* cdict, LZ4F_blockChecksum_t crcFlag)
+{
+    BYTE* const cSizePtr = (BYTE*)dst;
+    U32 cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+4),
+                                      (int)(srcSize), (int)(srcSize-1),
+                                      level, cdict);
+    LZ4F_writeLE32(cSizePtr, cSize);
+    if (cSize == 0) {  /* compression failed */
+        cSize = (U32)srcSize;
+        LZ4F_writeLE32(cSizePtr, cSize | LZ4F_BLOCKUNCOMPRESSED_FLAG);
+        memcpy(cSizePtr+4, src, srcSize);
+    }
+    if (crcFlag) {
+        U32 const crc32 = XXH32(cSizePtr+4, cSize, 0);  /* checksum of compressed data */
+        LZ4F_writeLE32(cSizePtr+4+cSize, crc32);
+    }
+    return 4 + cSize + ((U32)crcFlag)*4;
+}
+
+
+static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+    int const acceleration = (level < -1) ? -level : 1;
+    if (cdict) {
+        memcpy(ctx, cdict->fastCtx, sizeof(*cdict->fastCtx));
+        return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
+    }
+    return LZ4_compress_fast_extState(ctx, src, dst, srcSize, dstCapacity, acceleration);
+}
+
+static int LZ4F_compressBlock_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+    int const acceleration = (level < -1) ? -level : 1;
+    (void)cdict; /* init once at beginning of frame */
+    return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
+}
+
+static int LZ4F_compressBlockHC(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+    if (cdict) {
+        memcpy(ctx, cdict->HCCtx, sizeof(*cdict->HCCtx));
+        LZ4_setCompressionLevel((LZ4_streamHC_t*)ctx, level);
+        return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity);
+    }
+    return LZ4_compress_HC_extStateHC(ctx, src, dst, srcSize, dstCapacity, level);
+}
+
+static int LZ4F_compressBlockHC_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+    (void)level; (void)cdict; /* init once at beginning of frame */
+    return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity);
+}
+
+static compressFunc_t LZ4F_selectCompression(LZ4F_blockMode_t blockMode, int level)
+{
+    if (level < LZ4HC_CLEVEL_MIN) {
+        if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlock;
+        return LZ4F_compressBlock_continue;
+    }
+    if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlockHC;
+    return LZ4F_compressBlockHC_continue;
+}
+
+static int LZ4F_localSaveDict(LZ4F_cctx_t* cctxPtr)
+{
+    if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN)
+        return LZ4_saveDict ((LZ4_stream_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+    return LZ4_saveDictHC ((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+}
+
+typedef enum { notDone, fromTmpBuffer, fromSrcBuffer } LZ4F_lastBlockStatus;
+
+/*! LZ4F_compressUpdate() :
+ *  LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ *  dstBuffer MUST be >= LZ4F_compressBound(srcSize, preferencesPtr).
+ *  LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * @return : the number of bytes written into dstBuffer. It can be zero, meaning input data was just buffered.
+ *           or an error code if it fails (which can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr,
+                           void* dstBuffer, size_t dstCapacity,
+                     const void* srcBuffer, size_t srcSize,
+                     const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    LZ4F_compressOptions_t cOptionsNull;
+    size_t const blockSize = cctxPtr->maxBlockSize;
+    const BYTE* srcPtr = (const BYTE*)srcBuffer;
+    const BYTE* const srcEnd = srcPtr + srcSize;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    LZ4F_lastBlockStatus lastBlockCompressed = notDone;
+    compressFunc_t const compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
+
+
+    if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC);
+    if (dstCapacity < LZ4F_compressBound_internal(srcSize, &(cctxPtr->prefs), cctxPtr->tmpInSize)) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+    memset(&cOptionsNull, 0, sizeof(cOptionsNull));
+    if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull;
+
+    /* complete tmp buffer */
+    if (cctxPtr->tmpInSize > 0) {   /* some data already within tmp buffer */
+        size_t const sizeToCopy = blockSize - cctxPtr->tmpInSize;
+        if (sizeToCopy > srcSize) {
+            /* add src to tmpIn buffer */
+            memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, srcSize);
+            srcPtr = srcEnd;
+            cctxPtr->tmpInSize += srcSize;
+            /* still needs some CRC */
+        } else {
+            /* complete tmpIn block and then compress it */
+            lastBlockCompressed = fromTmpBuffer;
+            memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy);
+            srcPtr += sizeToCopy;
+
+            dstPtr += LZ4F_makeBlock(dstPtr, cctxPtr->tmpIn, blockSize,
+                                     compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                                     cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
+
+            if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += blockSize;
+            cctxPtr->tmpInSize = 0;
+        }
+    }
+
+    while ((size_t)(srcEnd - srcPtr) >= blockSize) {
+        /* compress full blocks */
+        lastBlockCompressed = fromSrcBuffer;
+        dstPtr += LZ4F_makeBlock(dstPtr, srcPtr, blockSize,
+                                 compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                                 cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
+        srcPtr += blockSize;
+    }
+
+    if ((cctxPtr->prefs.autoFlush) && (srcPtr < srcEnd)) {
+        /* compress remaining input < blockSize */
+        lastBlockCompressed = fromSrcBuffer;
+        dstPtr += LZ4F_makeBlock(dstPtr, srcPtr, srcEnd - srcPtr,
+                                 compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                                 cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
+        srcPtr  = srcEnd;
+    }
+
+    /* preserve dictionary if necessary */
+    if ((cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) && (lastBlockCompressed==fromSrcBuffer)) {
+        if (compressOptionsPtr->stableSrc) {
+            cctxPtr->tmpIn = cctxPtr->tmpBuff;
+        } else {
+            int const realDictSize = LZ4F_localSaveDict(cctxPtr);
+            if (realDictSize==0) return err0r(LZ4F_ERROR_GENERIC);
+            cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+        }
+    }
+
+    /* keep tmpIn within limits */
+    if ((cctxPtr->tmpIn + blockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)   /* necessarily LZ4F_blockLinked && lastBlockCompressed==fromTmpBuffer */
+        && !(cctxPtr->prefs.autoFlush))
+    {
+        int const realDictSize = LZ4F_localSaveDict(cctxPtr);
+        cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+    }
+
+    /* some input data left, necessarily < blockSize */
+    if (srcPtr < srcEnd) {
+        /* fill tmp buffer */
+        size_t const sizeToCopy = srcEnd - srcPtr;
+        memcpy(cctxPtr->tmpIn, srcPtr, sizeToCopy);
+        cctxPtr->tmpInSize = sizeToCopy;
+    }
+
+    if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled)
+        XXH32_update(&(cctxPtr->xxh), srcBuffer, srcSize);
+
+    cctxPtr->totalInSize += srcSize;
+    return dstPtr - dstStart;
+}
+
+
+/*! LZ4F_flush() :
+ *  Should you need to create compressed data immediately, without waiting for a block to be filled,
+ *  you can call LZ4_flush(), which will immediately compress any remaining data stored within compressionContext.
+ *  The result of the function is the number of bytes written into dstBuffer
+ *  (it can be zero, this means there was no data left within compressionContext)
+ *  The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ *  The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ */
+size_t LZ4F_flush(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    compressFunc_t compress;
+
+    if (cctxPtr->tmpInSize == 0) return 0;   /* nothing to flush */
+    if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC);
+    if (dstCapacity < (cctxPtr->tmpInSize + 4)) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);   /* +4 : block header(4)  */
+    (void)compressOptionsPtr;   /* not yet useful */
+
+    /* select compression function */
+    compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
+
+    /* compress tmp buffer */
+    dstPtr += LZ4F_makeBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize,
+                             compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                             cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
+    if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += cctxPtr->tmpInSize;
+    cctxPtr->tmpInSize = 0;
+
+    /* keep tmpIn within limits */
+    if ((cctxPtr->tmpIn + cctxPtr->maxBlockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)) {  /* necessarily LZ4F_blockLinked */
+        int realDictSize = LZ4F_localSaveDict(cctxPtr);
+        cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+    }
+
+    return dstPtr - dstStart;
+}
+
+
+/*! LZ4F_compressEnd() :
+ * When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
+ * It will flush whatever data remained within compressionContext (like LZ4_flush())
+ * but also properly finalize the frame, with an endMark and a checksum.
+ * The result of the function is the number of bytes written into dstBuffer (necessarily >= 4 (endMark size))
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ * The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * compressionContext can then be used again, starting with LZ4F_compressBegin(). The preferences will remain the same.
+ */
+size_t LZ4F_compressEnd(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+
+    size_t const flushSize = LZ4F_flush(cctxPtr, dstBuffer, dstMaxSize, compressOptionsPtr);
+    if (LZ4F_isError(flushSize)) return flushSize;
+    dstPtr += flushSize;
+
+    LZ4F_writeLE32(dstPtr, 0);
+    dstPtr+=4;   /* endMark */
+
+    if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled) {
+        U32 const xxh = XXH32_digest(&(cctxPtr->xxh));
+        LZ4F_writeLE32(dstPtr, xxh);
+        dstPtr+=4;   /* content Checksum */
+    }
+
+    cctxPtr->cStage = 0;   /* state is now re-usable (with identical preferences) */
+    cctxPtr->maxBufferSize = 0;  /* reuse HC context */
+
+    if (cctxPtr->prefs.frameInfo.contentSize) {
+        if (cctxPtr->prefs.frameInfo.contentSize != cctxPtr->totalInSize)
+            return err0r(LZ4F_ERROR_frameSize_wrong);
+    }
+
+    return dstPtr - dstStart;
+}
+
+
+/*-***************************************************
+*   Frame Decompression
+*****************************************************/
+
+typedef enum {
+    dstage_getFrameHeader=0, dstage_storeFrameHeader,
+    dstage_init,
+    dstage_getBlockHeader, dstage_storeBlockHeader,
+    dstage_copyDirect, dstage_getBlockChecksum,
+    dstage_getCBlock, dstage_storeCBlock,
+    dstage_decodeCBlock, dstage_decodeCBlock_intoDst,
+    dstage_decodeCBlock_intoTmp, dstage_flushOut,
+    dstage_getSuffix, dstage_storeSuffix,
+    dstage_getSFrameSize, dstage_storeSFrameSize,
+    dstage_skipSkippable
+} dStage_t;
+
+struct LZ4F_dctx_s {
+    LZ4F_frameInfo_t frameInfo;
+    U32    version;
+    dStage_t dStage;
+    U64    frameRemainingSize;
+    size_t maxBlockSize;
+    size_t maxBufferSize;
+    BYTE*  tmpIn;
+    size_t tmpInSize;
+    size_t tmpInTarget;
+    BYTE*  tmpOutBuffer;
+    const BYTE* dict;
+    size_t dictSize;
+    BYTE*  tmpOut;
+    size_t tmpOutSize;
+    size_t tmpOutStart;
+    XXH32_state_t xxh;
+    XXH32_state_t blockChecksum;
+    BYTE   header[LZ4F_HEADER_SIZE_MAX];
+};  /* typedef'd to LZ4F_dctx in lz4frame.h */
+
+
+/*! LZ4F_createDecompressionContext() :
+ *  Create a decompressionContext object, which will track all decompression operations.
+ *  Provides a pointer to a fully allocated and initialized LZ4F_decompressionContext object.
+ *  Object can later be released using LZ4F_freeDecompressionContext().
+ * @return : if != 0, there was an error during context creation.
+ */
+LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** LZ4F_decompressionContextPtr, unsigned versionNumber)
+{
+    LZ4F_dctx* const dctx = (LZ4F_dctx*)ALLOCATOR(sizeof(LZ4F_dctx));
+    if (dctx==NULL) return err0r(LZ4F_ERROR_GENERIC);
+
+    dctx->version = versionNumber;
+    *LZ4F_decompressionContextPtr = dctx;
+    return LZ4F_OK_NoError;
+}
+
+LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx)
+{
+    LZ4F_errorCode_t result = LZ4F_OK_NoError;
+    if (dctx != NULL) {   /* can accept NULL input, like free() */
+      result = (LZ4F_errorCode_t)dctx->dStage;
+      FREEMEM(dctx->tmpIn);
+      FREEMEM(dctx->tmpOutBuffer);
+      FREEMEM(dctx);
+    }
+    return result;
+}
+
+
+/*==---   Streaming Decompression operations   ---==*/
+
+void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx)
+{
+    dctx->dStage = dstage_getFrameHeader;
+    dctx->dict = NULL;
+    dctx->dictSize = 0;
+}
+
+
+/*! LZ4F_headerSize() :
+ *   @return : size of frame header
+ *             or an error code, which can be tested using LZ4F_isError()
+ */
+static size_t LZ4F_headerSize(const void* src, size_t srcSize)
+{
+    /* minimal srcSize to determine header size */
+    if (srcSize < 5) return err0r(LZ4F_ERROR_frameHeader_incomplete);
+
+    /* special case : skippable frames */
+    if ((LZ4F_readLE32(src) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) return 8;
+
+    /* control magic number */
+    if (LZ4F_readLE32(src) != LZ4F_MAGICNUMBER)
+        return err0r(LZ4F_ERROR_frameType_unknown);
+
+    /* Frame Header Size */
+    {   BYTE const FLG = ((const BYTE*)src)[4];
+        U32 const contentSizeFlag = (FLG>>3) & _1BIT;
+        U32 const dictIDFlag = FLG & _1BIT;
+        return minFHSize + (contentSizeFlag*8) + (dictIDFlag*4);
+    }
+}
+
+
+/*! LZ4F_decodeHeader() :
+ *  input   : `src` points at the **beginning of the frame**
+ *  output  : set internal values of dctx, such as
+ *            dctx->frameInfo and dctx->dStage.
+ *            Also allocates internal buffers.
+ *  @return : nb Bytes read from src (necessarily <= srcSize)
+ *            or an error code (testable with LZ4F_isError())
+ */
+static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize)
+{
+    unsigned blockMode, blockChecksumFlag, contentSizeFlag, contentChecksumFlag, dictIDFlag, blockSizeID;
+    size_t frameHeaderSize;
+    const BYTE* srcPtr = (const BYTE*)src;
+
+    /* need to decode header to get frameInfo */
+    if (srcSize < minFHSize) return err0r(LZ4F_ERROR_frameHeader_incomplete);   /* minimal frame header size */
+    memset(&(dctx->frameInfo), 0, sizeof(dctx->frameInfo));
+
+    /* special case : skippable frames */
+    if ((LZ4F_readLE32(srcPtr) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) {
+        dctx->frameInfo.frameType = LZ4F_skippableFrame;
+        if (src == (void*)(dctx->header)) {
+            dctx->tmpInSize = srcSize;
+            dctx->tmpInTarget = 8;
+            dctx->dStage = dstage_storeSFrameSize;
+            return srcSize;
+        } else {
+            dctx->dStage = dstage_getSFrameSize;
+            return 4;
+        }
+    }
+
+    /* control magic number */
+    if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER)
+        return err0r(LZ4F_ERROR_frameType_unknown);
+    dctx->frameInfo.frameType = LZ4F_frame;
+
+    /* Flags */
+    {   U32 const FLG = srcPtr[4];
+        U32 const version = (FLG>>6) & _2BITS;
+        blockChecksumFlag = (FLG>>4) & _1BIT;
+        blockMode = (FLG>>5) & _1BIT;
+        contentSizeFlag = (FLG>>3) & _1BIT;
+        contentChecksumFlag = (FLG>>2) & _1BIT;
+        dictIDFlag = FLG & _1BIT;
+        /* validate */
+        if (((FLG>>1)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set); /* Reserved bit */
+        if (version != 1) return err0r(LZ4F_ERROR_headerVersion_wrong);        /* Version Number, only supported value */
+    }
+
+    /* Frame Header Size */
+    frameHeaderSize = minFHSize + (contentSizeFlag*8) + (dictIDFlag*4);
+
+    if (srcSize < frameHeaderSize) {
+        /* not enough input to fully decode frame header */
+        if (srcPtr != dctx->header)
+            memcpy(dctx->header, srcPtr, srcSize);
+        dctx->tmpInSize = srcSize;
+        dctx->tmpInTarget = frameHeaderSize;
+        dctx->dStage = dstage_storeFrameHeader;
+        return srcSize;
+    }
+
+    {   U32 const BD = srcPtr[5];
+        blockSizeID = (BD>>4) & _3BITS;
+        /* validate */
+        if (((BD>>7)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set);   /* Reserved bit */
+        if (blockSizeID < 4) return err0r(LZ4F_ERROR_maxBlockSize_invalid);    /* 4-7 only supported values for the time being */
+        if (((BD>>0)&_4BITS) != 0) return err0r(LZ4F_ERROR_reservedFlag_set);  /* Reserved bits */
+    }
+
+    /* check header */
+    {   BYTE const HC = LZ4F_headerChecksum(srcPtr+4, frameHeaderSize-5);
+        if (HC != srcPtr[frameHeaderSize-1])
+            return err0r(LZ4F_ERROR_headerChecksum_invalid);
+    }
+
+    /* save */
+    dctx->frameInfo.blockMode = (LZ4F_blockMode_t)blockMode;
+    dctx->frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)blockChecksumFlag;
+    dctx->frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)contentChecksumFlag;
+    dctx->frameInfo.blockSizeID = (LZ4F_blockSizeID_t)blockSizeID;
+    dctx->maxBlockSize = LZ4F_getBlockSize(blockSizeID);
+    if (contentSizeFlag)
+        dctx->frameRemainingSize =
+            dctx->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6);
+    if (dictIDFlag)
+        dctx->frameInfo.dictID = LZ4F_readLE32(srcPtr + frameHeaderSize - 5);
+
+    dctx->dStage = dstage_init;
+
+    return frameHeaderSize;
+}
+
+
+/*! LZ4F_getFrameInfo() :
+ *  This function extracts frame parameters (max blockSize, frame checksum, etc.).
+ *  Usage is optional. Objective is to provide relevant information for allocation purposes.
+ *  This function works in 2 situations :
+ *   - At the beginning of a new frame, in which case it will decode this information from `srcBuffer`, and start the decoding process.
+ *     Amount of input data provided must be large enough to successfully decode the frame header.
+ *     A header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes. It's possible to provide more input data than this minimum.
+ *   - After decoding has been started. In which case, no input is read, frame parameters are extracted from dctx.
+ *  The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
+ *  Decompression must resume from (srcBuffer + *srcSizePtr).
+ * @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ *           or an error code which can be tested using LZ4F_isError()
+ *  note 1 : in case of error, dctx is not modified. Decoding operations can resume from where they stopped.
+ *  note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
+ */
+LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, LZ4F_frameInfo_t* frameInfoPtr,
+                                   const void* srcBuffer, size_t* srcSizePtr)
+{
+    if (dctx->dStage > dstage_storeFrameHeader) {  /* assumption :  dstage_* header enum at beginning of range */
+        /* frameInfo already decoded */
+        size_t o=0, i=0;
+        *srcSizePtr = 0;
+        *frameInfoPtr = dctx->frameInfo;
+        /* returns : recommended nb of bytes for LZ4F_decompress() */
+        return LZ4F_decompress(dctx, NULL, &o, NULL, &i, NULL);
+    } else {
+        if (dctx->dStage == dstage_storeFrameHeader) {
+            /* frame decoding already started, in the middle of header => automatic fail */
+            *srcSizePtr = 0;
+            return err0r(LZ4F_ERROR_frameDecoding_alreadyStarted);
+        } else {
+            size_t decodeResult;
+            size_t const hSize = LZ4F_headerSize(srcBuffer, *srcSizePtr);
+            if (LZ4F_isError(hSize)) { *srcSizePtr=0; return hSize; }
+            if (*srcSizePtr < hSize) {
+                *srcSizePtr=0;
+                return err0r(LZ4F_ERROR_frameHeader_incomplete);
+            }
+
+            decodeResult = LZ4F_decodeHeader(dctx, srcBuffer, hSize);
+            if (LZ4F_isError(decodeResult)) {
+                *srcSizePtr = 0;
+            } else {
+                *srcSizePtr = decodeResult;
+                decodeResult = BHSize;   /* block header size */
+            }
+            *frameInfoPtr = dctx->frameInfo;
+            return decodeResult;
+    }   }
+}
+
+
+/* LZ4F_updateDict() :
+ * only used for LZ4F_blockLinked mode */
+static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize, const BYTE* dstPtr0, unsigned withinTmp)
+{
+    if (dctx->dictSize==0)
+        dctx->dict = (const BYTE*)dstPtr;   /* priority to dictionary continuity */
+
+    if (dctx->dict + dctx->dictSize == dstPtr) {  /* dictionary continuity */
+        dctx->dictSize += dstSize;
+        return;
+    }
+
+    if (dstPtr - dstPtr0 + dstSize >= 64 KB) {  /* dstBuffer large enough to become dictionary */
+        dctx->dict = (const BYTE*)dstPtr0;
+        dctx->dictSize = dstPtr - dstPtr0 + dstSize;
+        return;
+    }
+
+    if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) {
+        /* assumption : dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart */
+        dctx->dictSize += dstSize;
+        return;
+    }
+
+    if (withinTmp) { /* copy relevant dict portion in front of tmpOut within tmpOutBuffer */
+        size_t const preserveSize = dctx->tmpOut - dctx->tmpOutBuffer;
+        size_t copySize = 64 KB - dctx->tmpOutSize;
+        const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
+        if (dctx->tmpOutSize > 64 KB) copySize = 0;
+        if (copySize > preserveSize) copySize = preserveSize;
+
+        memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+        dctx->dict = dctx->tmpOutBuffer;
+        dctx->dictSize = preserveSize + dctx->tmpOutStart + dstSize;
+        return;
+    }
+
+    if (dctx->dict == dctx->tmpOutBuffer) {    /* copy dst into tmp to complete dict */
+        if (dctx->dictSize + dstSize > dctx->maxBufferSize) {  /* tmp buffer not large enough */
+            size_t const preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
+            memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
+            dctx->dictSize = preserveSize;
+        }
+        memcpy(dctx->tmpOutBuffer + dctx->dictSize, dstPtr, dstSize);
+        dctx->dictSize += dstSize;
+        return;
+    }
+
+    /* join dict & dest into tmp */
+    {   size_t preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
+        if (preserveSize > dctx->dictSize) preserveSize = dctx->dictSize;
+        memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
+        memcpy(dctx->tmpOutBuffer + preserveSize, dstPtr, dstSize);
+        dctx->dict = dctx->tmpOutBuffer;
+        dctx->dictSize = preserveSize + dstSize;
+    }
+}
+
+
+
+/*! LZ4F_decompress() :
+ *  Call this function repetitively to regenerate compressed data in srcBuffer.
+ *  The function will attempt to decode up to *srcSizePtr bytes from srcBuffer
+ *  into dstBuffer of capacity *dstSizePtr.
+ *
+ *  The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+ *
+ *  The number of bytes effectively read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+ *  If number of bytes read is < number of bytes provided, then decompression operation is not complete.
+ *  Remaining data will have to be presented again in a subsequent invocation.
+ *
+ *  The function result is an hint of the better srcSize to use for next call to LZ4F_decompress.
+ *  Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ *  Respecting the hint provides a small boost to performance, since it allows less buffer shuffling.
+ *  Note that this is just a hint, and it's always possible to any srcSize value.
+ *  When a frame is fully decoded, @return will be 0.
+ *  If decompression failed, @return is an error code which can be tested using LZ4F_isError().
+ */
+size_t LZ4F_decompress(LZ4F_dctx* dctx,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+    LZ4F_decompressOptions_t optionsNull;
+    const BYTE* const srcStart = (const BYTE*)srcBuffer;
+    const BYTE* const srcEnd = srcStart + *srcSizePtr;
+    const BYTE* srcPtr = srcStart;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* const dstEnd = dstStart + *dstSizePtr;
+    BYTE* dstPtr = dstStart;
+    const BYTE* selectedIn = NULL;
+    unsigned doAnotherStage = 1;
+    size_t nextSrcSizeHint = 1;
+
+
+    memset(&optionsNull, 0, sizeof(optionsNull));
+    if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull;
+    *srcSizePtr = 0;
+    *dstSizePtr = 0;
+
+    /* behaves as a state machine */
+
+    while (doAnotherStage) {
+
+        switch(dctx->dStage)
+        {
+
+        case dstage_getFrameHeader:
+            if ((size_t)(srcEnd-srcPtr) >= maxFHSize) {  /* enough to decode - shortcut */
+                size_t const hSize = LZ4F_decodeHeader(dctx, srcPtr, srcEnd-srcPtr);  /* will update dStage appropriately */
+                if (LZ4F_isError(hSize)) return hSize;
+                srcPtr += hSize;
+                break;
+            }
+            dctx->tmpInSize = 0;
+            if (srcEnd-srcPtr == 0) return minFHSize;   /* 0-size input */
+            dctx->tmpInTarget = minFHSize;   /* minimum to attempt decode */
+            dctx->dStage = dstage_storeFrameHeader;
+            /* fall-through */
+
+        case dstage_storeFrameHeader:
+            {   size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize, (size_t)(srcEnd - srcPtr));
+                memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+                dctx->tmpInSize += sizeToCopy;
+                srcPtr += sizeToCopy;
+                if (dctx->tmpInSize < dctx->tmpInTarget) {
+                    nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize) + BHSize;   /* rest of header + nextBlockHeader */
+                    doAnotherStage = 0;   /* not enough src data, ask for some more */
+                    break;
+                }
+                {   size_t const hSize = LZ4F_decodeHeader(dctx, dctx->header, dctx->tmpInTarget);  /* will update dStage appropriately */
+                    if (LZ4F_isError(hSize)) return hSize;
+                }
+                break;
+            }
+
+        case dstage_init:
+            if (dctx->frameInfo.contentChecksumFlag) XXH32_reset(&(dctx->xxh), 0);
+            /* internal buffers allocation */
+            {   size_t const bufferNeeded = dctx->maxBlockSize + ((dctx->frameInfo.blockMode==LZ4F_blockLinked) * 128 KB) + 4 /* block checksum */;
+                if (bufferNeeded > dctx->maxBufferSize) {   /* tmp buffers too small */
+                    dctx->maxBufferSize = 0;   /* ensure allocation will be re-attempted on next entry*/
+                    FREEMEM(dctx->tmpIn);
+                    dctx->tmpIn = (BYTE*)ALLOCATOR(dctx->maxBlockSize);
+                    if (dctx->tmpIn == NULL) return err0r(LZ4F_ERROR_allocation_failed);
+                    FREEMEM(dctx->tmpOutBuffer);
+                    dctx->tmpOutBuffer= (BYTE*)ALLOCATOR(bufferNeeded);
+                    if (dctx->tmpOutBuffer== NULL) return err0r(LZ4F_ERROR_allocation_failed);
+                    dctx->maxBufferSize = bufferNeeded;
+            }   }
+            dctx->tmpInSize = 0;
+            dctx->tmpInTarget = 0;
+            dctx->tmpOut = dctx->tmpOutBuffer;
+            dctx->tmpOutStart = 0;
+            dctx->tmpOutSize = 0;
+
+            dctx->dStage = dstage_getBlockHeader;
+            /* fall-through */
+
+        case dstage_getBlockHeader:
+            if ((size_t)(srcEnd - srcPtr) >= BHSize) {
+                selectedIn = srcPtr;
+                srcPtr += BHSize;
+            } else {
+                /* not enough input to read cBlockSize field */
+                dctx->tmpInSize = 0;
+                dctx->dStage = dstage_storeBlockHeader;
+            }
+
+            if (dctx->dStage == dstage_storeBlockHeader)   /* can be skipped */
+        case dstage_storeBlockHeader:
+            {   size_t sizeToCopy = BHSize - dctx->tmpInSize;
+                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr;
+                memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+                srcPtr += sizeToCopy;
+                dctx->tmpInSize += sizeToCopy;
+                if (dctx->tmpInSize < BHSize) {   /* not enough input for cBlockSize */
+                    nextSrcSizeHint = BHSize - dctx->tmpInSize;
+                    doAnotherStage  = 0;
+                    break;
+                }
+                selectedIn = dctx->tmpIn;
+            }
+
+        /* decode block header */
+            {   size_t const nextCBlockSize = LZ4F_readLE32(selectedIn) & 0x7FFFFFFFU;
+                size_t const crcSize = dctx->frameInfo.blockChecksumFlag * 4;
+                if (nextCBlockSize==0) {  /* frameEnd signal, no more block */
+                    dctx->dStage = dstage_getSuffix;
+                    break;
+                }
+                if (nextCBlockSize > dctx->maxBlockSize)
+                    return err0r(LZ4F_ERROR_maxBlockSize_invalid);
+                if (LZ4F_readLE32(selectedIn) & LZ4F_BLOCKUNCOMPRESSED_FLAG) {
+                    /* next block is uncompressed */
+                    dctx->tmpInTarget = nextCBlockSize;
+                    if (dctx->frameInfo.blockChecksumFlag) {
+                        XXH32_reset(&dctx->blockChecksum, 0);
+                    }
+                    dctx->dStage = dstage_copyDirect;
+                    break;
+                }
+                /* next block is a compressed block */
+                dctx->tmpInTarget = nextCBlockSize + crcSize;
+                dctx->dStage = dstage_getCBlock;
+                if (dstPtr==dstEnd) {
+                    nextSrcSizeHint = nextCBlockSize + crcSize + BHSize;
+                    doAnotherStage = 0;
+                }
+                break;
+            }
+
+        case dstage_copyDirect:   /* uncompressed block */
+            {   size_t const minBuffSize = MIN((size_t)(srcEnd-srcPtr), (size_t)(dstEnd-dstPtr));
+                size_t const sizeToCopy = MIN(dctx->tmpInTarget, minBuffSize);
+                memcpy(dstPtr, srcPtr, sizeToCopy);
+                if (dctx->frameInfo.blockChecksumFlag) {
+                    XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy);
+                }
+                if (dctx->frameInfo.contentChecksumFlag)
+                    XXH32_update(&dctx->xxh, srcPtr, sizeToCopy);
+                if (dctx->frameInfo.contentSize)
+                    dctx->frameRemainingSize -= sizeToCopy;
+
+                /* history management (linked blocks only)*/
+                if (dctx->frameInfo.blockMode == LZ4F_blockLinked)
+                    LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 0);
+
+                srcPtr += sizeToCopy;
+                dstPtr += sizeToCopy;
+                if (sizeToCopy == dctx->tmpInTarget) {   /* all done */
+                    if (dctx->frameInfo.blockChecksumFlag) {
+                        dctx->tmpInSize = 0;
+                        dctx->dStage = dstage_getBlockChecksum;
+                    } else
+                        dctx->dStage = dstage_getBlockHeader;  /* new block */
+                    break;
+                }
+                dctx->tmpInTarget -= sizeToCopy;  /* need to copy more */
+                nextSrcSizeHint = dctx->tmpInTarget +
+                                + dctx->frameInfo.contentChecksumFlag * 4  /* block checksum */
+                                + BHSize /* next header size */;
+                doAnotherStage = 0;
+                break;
+            }
+
+        /* check block checksum for recently transferred uncompressed block */
+        case dstage_getBlockChecksum:
+            {   const void* crcSrc;
+                if ((srcEnd-srcPtr >= 4) && (dctx->tmpInSize==0)) {
+                    crcSrc = srcPtr;
+                    srcPtr += 4;
+                } else {
+                    size_t const stillToCopy = 4 - dctx->tmpInSize;
+                    size_t const sizeToCopy = MIN(stillToCopy, (size_t)(srcEnd-srcPtr));
+                    memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+                    dctx->tmpInSize += sizeToCopy;
+                    srcPtr += sizeToCopy;
+                    if (dctx->tmpInSize < 4) {  /* all input consumed */
+                        doAnotherStage = 0;
+                        break;
+                    }
+                    crcSrc = dctx->header;
+                }
+                {   U32 const readCRC = LZ4F_readLE32(crcSrc);
+                    U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
+                    if (readCRC != calcCRC)
+                        return err0r(LZ4F_ERROR_blockChecksum_invalid);
+                }
+            }
+            dctx->dStage = dstage_getBlockHeader;  /* new block */
+            break;
+
+        case dstage_getCBlock:   /* entry from dstage_decodeCBlockSize */
+            if ((size_t)(srcEnd-srcPtr) < dctx->tmpInTarget) {
+                dctx->tmpInSize = 0;
+                dctx->dStage = dstage_storeCBlock;
+                break;
+            }
+            /* input large enough to read full block directly */
+            selectedIn = srcPtr;
+            srcPtr += dctx->tmpInTarget;
+            dctx->dStage = dstage_decodeCBlock;
+            break;
+
+        case dstage_storeCBlock:
+            {   size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize, (size_t)(srcEnd-srcPtr));
+                memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+                dctx->tmpInSize += sizeToCopy;
+                srcPtr += sizeToCopy;
+                if (dctx->tmpInSize < dctx->tmpInTarget) { /* need more input */
+                    nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize) + BHSize;
+                    doAnotherStage=0;
+                    break;
+                }
+                selectedIn = dctx->tmpIn;
+                dctx->dStage = dstage_decodeCBlock;
+            }
+            /* fall-through */
+
+        /* At this stage, input is large enough to decode a block */
+        case dstage_decodeCBlock:
+            if (dctx->frameInfo.blockChecksumFlag) {
+                dctx->tmpInTarget -= 4;
+                {   U32 const readBlockCrc = LZ4F_readLE32(selectedIn + dctx->tmpInTarget);
+                    U32 const calcBlockCrc = XXH32(selectedIn, dctx->tmpInTarget, 0);
+                    if (readBlockCrc != calcBlockCrc)
+                        return err0r(LZ4F_ERROR_blockChecksum_invalid);
+            }   }
+            if ((size_t)(dstEnd-dstPtr) < dctx->maxBlockSize)   /* not enough place into dst : decode into tmpOut */
+                dctx->dStage = dstage_decodeCBlock_intoTmp;
+            else
+                dctx->dStage = dstage_decodeCBlock_intoDst;
+            break;
+
+        case dstage_decodeCBlock_intoDst:
+            {   int const decodedSize = LZ4_decompress_safe_usingDict(
+                        (const char*)selectedIn, (char*)dstPtr,
+                        (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
+                        (const char*)dctx->dict, (int)dctx->dictSize);
+                if (decodedSize < 0) return err0r(LZ4F_ERROR_GENERIC);   /* decompression failed */
+                if (dctx->frameInfo.contentChecksumFlag)
+                    XXH32_update(&(dctx->xxh), dstPtr, decodedSize);
+                if (dctx->frameInfo.contentSize)
+                    dctx->frameRemainingSize -= decodedSize;
+
+                /* dictionary management */
+                if (dctx->frameInfo.blockMode==LZ4F_blockLinked)
+                    LZ4F_updateDict(dctx, dstPtr, decodedSize, dstStart, 0);
+
+                dstPtr += decodedSize;
+                dctx->dStage = dstage_getBlockHeader;
+                break;
+            }
+
+        case dstage_decodeCBlock_intoTmp:
+            /* not enough place into dst : decode into tmpOut */
+
+            /* ensure enough place for tmpOut */
+            if (dctx->frameInfo.blockMode == LZ4F_blockLinked) {
+                if (dctx->dict == dctx->tmpOutBuffer) {
+                    if (dctx->dictSize > 128 KB) {
+                        memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - 64 KB, 64 KB);
+                        dctx->dictSize = 64 KB;
+                    }
+                    dctx->tmpOut = dctx->tmpOutBuffer + dctx->dictSize;
+                } else {  /* dict not within tmp */
+                    size_t const reservedDictSpace = MIN(dctx->dictSize, 64 KB);
+                    dctx->tmpOut = dctx->tmpOutBuffer + reservedDictSpace;
+                }
+            }
+
+            /* Decode block */
+            {   int const decodedSize = LZ4_decompress_safe_usingDict(
+                        (const char*)selectedIn, (char*)dctx->tmpOut,
+                        (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
+                        (const char*)dctx->dict, (int)dctx->dictSize);
+                if (decodedSize < 0)  /* decompression failed */
+                    return err0r(LZ4F_ERROR_decompressionFailed);
+                if (dctx->frameInfo.contentChecksumFlag)
+                    XXH32_update(&(dctx->xxh), dctx->tmpOut, decodedSize);
+                if (dctx->frameInfo.contentSize)
+                    dctx->frameRemainingSize -= decodedSize;
+                dctx->tmpOutSize = decodedSize;
+                dctx->tmpOutStart = 0;
+                dctx->dStage = dstage_flushOut;
+            }
+            /* fall-through */
+
+        case dstage_flushOut:  /* flush decoded data from tmpOut to dstBuffer */
+            {   size_t const sizeToCopy = MIN(dctx->tmpOutSize - dctx->tmpOutStart, (size_t)(dstEnd-dstPtr));
+                memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy);
+
+                /* dictionary management */
+                if (dctx->frameInfo.blockMode==LZ4F_blockLinked)
+                    LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1);
+
+                dctx->tmpOutStart += sizeToCopy;
+                dstPtr += sizeToCopy;
+
+                if (dctx->tmpOutStart == dctx->tmpOutSize) { /* all flushed */
+                    dctx->dStage = dstage_getBlockHeader;  /* get next block */
+                    break;
+                }
+                nextSrcSizeHint = BHSize;
+                doAnotherStage = 0;   /* still some data to flush */
+                break;
+            }
+
+        case dstage_getSuffix:
+            {   size_t const suffixSize = dctx->frameInfo.contentChecksumFlag * 4;
+                if (dctx->frameRemainingSize)
+                    return err0r(LZ4F_ERROR_frameSize_wrong);   /* incorrect frame size decoded */
+                if (suffixSize == 0) {  /* frame completed */
+                    nextSrcSizeHint = 0;
+                    LZ4F_resetDecompressionContext(dctx);
+                    doAnotherStage = 0;
+                    break;
+                }
+                if ((srcEnd - srcPtr) < 4) {  /* not enough size for entire CRC */
+                    dctx->tmpInSize = 0;
+                    dctx->dStage = dstage_storeSuffix;
+                } else {
+                    selectedIn = srcPtr;
+                    srcPtr += 4;
+                }
+            }
+
+            if (dctx->dStage == dstage_storeSuffix)   /* can be skipped */
+        case dstage_storeSuffix:
+            {
+                size_t sizeToCopy = 4 - dctx->tmpInSize;
+                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr;
+                memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+                srcPtr += sizeToCopy;
+                dctx->tmpInSize += sizeToCopy;
+                if (dctx->tmpInSize < 4) { /* not enough input to read complete suffix */
+                    nextSrcSizeHint = 4 - dctx->tmpInSize;
+                    doAnotherStage=0;
+                    break;
+                }
+                selectedIn = dctx->tmpIn;
+            }
+
+        /* case dstage_checkSuffix: */   /* no direct call, avoid scan-build warning */
+            {   U32 const readCRC = LZ4F_readLE32(selectedIn);
+                U32 const resultCRC = XXH32_digest(&(dctx->xxh));
+                if (readCRC != resultCRC)
+                    return err0r(LZ4F_ERROR_contentChecksum_invalid);
+                nextSrcSizeHint = 0;
+                LZ4F_resetDecompressionContext(dctx);
+                doAnotherStage = 0;
+                break;
+            }
+
+        case dstage_getSFrameSize:
+            if ((srcEnd - srcPtr) >= 4) {
+                selectedIn = srcPtr;
+                srcPtr += 4;
+            } else {
+                /* not enough input to read cBlockSize field */
+                dctx->tmpInSize = 4;
+                dctx->tmpInTarget = 8;
+                dctx->dStage = dstage_storeSFrameSize;
+            }
+
+            if (dctx->dStage == dstage_storeSFrameSize)
+        case dstage_storeSFrameSize:
+            {
+                size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
+                                             (size_t)(srcEnd - srcPtr) );
+                memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+                srcPtr += sizeToCopy;
+                dctx->tmpInSize += sizeToCopy;
+                if (dctx->tmpInSize < dctx->tmpInTarget) {
+                    /* not enough input to get full sBlockSize; wait for more */
+                    nextSrcSizeHint = dctx->tmpInTarget - dctx->tmpInSize;
+                    doAnotherStage = 0;
+                    break;
+                }
+                selectedIn = dctx->header + 4;
+            }
+
+        /* case dstage_decodeSFrameSize: */   /* no direct access */
+            {   size_t const SFrameSize = LZ4F_readLE32(selectedIn);
+                dctx->frameInfo.contentSize = SFrameSize;
+                dctx->tmpInTarget = SFrameSize;
+                dctx->dStage = dstage_skipSkippable;
+                break;
+            }
+
+        case dstage_skipSkippable:
+            {   size_t const skipSize = MIN(dctx->tmpInTarget, (size_t)(srcEnd-srcPtr));
+                srcPtr += skipSize;
+                dctx->tmpInTarget -= skipSize;
+                doAnotherStage = 0;
+                nextSrcSizeHint = dctx->tmpInTarget;
+                if (nextSrcSizeHint) break;  /* still more to skip */
+                LZ4F_resetDecompressionContext(dctx);
+                break;
+            }
+        }
+    }
+
+    /* preserve history within tmp if necessary */
+    if ( (dctx->frameInfo.blockMode==LZ4F_blockLinked)
+      && (dctx->dict != dctx->tmpOutBuffer)
+      && (dctx->dStage != dstage_getFrameHeader)
+      && (!decompressOptionsPtr->stableDst)
+      && ((unsigned)(dctx->dStage-1) < (unsigned)(dstage_getSuffix-1)) )
+    {
+        if (dctx->dStage == dstage_flushOut) {
+            size_t preserveSize = dctx->tmpOut - dctx->tmpOutBuffer;
+            size_t copySize = 64 KB - dctx->tmpOutSize;
+            const BYTE* oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
+            if (dctx->tmpOutSize > 64 KB) copySize = 0;
+            if (copySize > preserveSize) copySize = preserveSize;
+
+            memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+            dctx->dict = dctx->tmpOutBuffer;
+            dctx->dictSize = preserveSize + dctx->tmpOutStart;
+        } else {
+            size_t newDictSize = dctx->dictSize;
+            const BYTE* oldDictEnd = dctx->dict + dctx->dictSize;
+            if ((newDictSize) > 64 KB) newDictSize = 64 KB;
+
+            memcpy(dctx->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize);
+
+            dctx->dict = dctx->tmpOutBuffer;
+            dctx->dictSize = newDictSize;
+            dctx->tmpOut = dctx->tmpOutBuffer + newDictSize;
+        }
+    }
+
+    *srcSizePtr = (srcPtr - srcStart);
+    *dstSizePtr = (dstPtr - dstStart);
+    return nextSrcSizeHint;
+}
+
+/*! LZ4F_decompress_usingDict() :
+ *  Same as LZ4F_decompress(), using a predefined dictionary.
+ *  Dictionary is used "in place", without any preprocessing.
+ *  It must remain accessible throughout the entire frame decoding.
+ */
+size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctx,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const void* dict, size_t dictSize,
+                       const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+    if (dctx->dStage <= dstage_init) {
+        dctx->dict = (const BYTE*)dict;
+        dctx->dictSize = dictSize;
+    }
+    return LZ4F_decompress(dctx, dstBuffer, dstSizePtr,
+                           srcBuffer, srcSizePtr,
+                           decompressOptionsPtr);
+}
diff --git a/lz4/lz4frame.h b/lz4/lz4frame.h
new file mode 100644
index 0000000..88a6513
--- /dev/null
+++ b/lz4/lz4frame.h
@@ -0,0 +1,391 @@
+/*
+   LZ4 auto-framing library
+   Header File
+   Copyright (C) 2011-2017, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/lz4/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* LZ4F is a stand-alone API to create LZ4-compressed frames
+ * conformant with specification v1.5.1.
+ * It also offers streaming capabilities.
+ * lz4.h is not required when using lz4frame.h.
+ * */
+
+#ifndef LZ4F_H_09782039843
+#define LZ4F_H_09782039843
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* ---   Dependency   --- */
+#include <stddef.h>   /* size_t */
+
+
+/**
+  Introduction
+
+  lz4frame.h implements LZ4 frame specification (doc/lz4_Frame_format.md).
+  lz4frame.h provides frame compression functions that take care
+  of encoding standard metadata alongside LZ4-compressed blocks.
+*/
+
+/*-***************************************************************
+ *  Compiler specifics
+ *****************************************************************/
+/*  LZ4_DLL_EXPORT :
+ *  Enable exporting of functions when building a Windows DLL
+ *  LZ4FLIB_API :
+ *  Control library symbols visibility.
+ */
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+#  define LZ4FLIB_API __declspec(dllexport)
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+#  define LZ4FLIB_API __declspec(dllimport)
+#elif defined(__GNUC__) && (__GNUC__ >= 4)
+#  define LZ4FLIB_API __attribute__ ((__visibility__ ("default")))
+#else
+#  define LZ4FLIB_API
+#endif
+
+#ifdef LZ4F_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4F_DEPRECATE(x) x
+#else
+#  if defined(_MSC_VER)
+#    define LZ4F_DEPRECATE(x) x   /* __declspec(deprecated) x - only works with C++ */
+#  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 6))
+#    define LZ4F_DEPRECATE(x) x __attribute__((deprecated))
+#  else
+#    define LZ4F_DEPRECATE(x) x   /* no deprecation warning for this compiler */
+#  endif
+#endif
+
+
+/*-************************************
+ *  Error management
+ **************************************/
+typedef size_t LZ4F_errorCode_t;
+
+LZ4FLIB_API unsigned    LZ4F_isError(LZ4F_errorCode_t code);   /**< tells if a `LZ4F_errorCode_t` function result is an error code */
+LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   /**< return error code string; useful for debugging */
+
+
+/*-************************************
+ *  Frame compression types
+ **************************************/
+/* #define LZ4F_ENABLE_OBSOLETE_ENUMS   // uncomment to enable obsolete enums */
+#ifdef LZ4F_ENABLE_OBSOLETE_ENUMS
+#  define LZ4F_OBSOLETE_ENUM(x) , LZ4F_DEPRECATE(x) = LZ4F_##x
+#else
+#  define LZ4F_OBSOLETE_ENUM(x)
+#endif
+
+/* The larger the block size, the (slightly) better the compression ratio,
+ * though there are diminishing returns.
+ * Larger blocks also increase memory usage on both compression and decompression sides. */
+typedef enum {
+    LZ4F_default=0,
+    LZ4F_max64KB=4,
+    LZ4F_max256KB=5,
+    LZ4F_max1MB=6,
+    LZ4F_max4MB=7
+    LZ4F_OBSOLETE_ENUM(max64KB)
+    LZ4F_OBSOLETE_ENUM(max256KB)
+    LZ4F_OBSOLETE_ENUM(max1MB)
+    LZ4F_OBSOLETE_ENUM(max4MB)
+} LZ4F_blockSizeID_t;
+
+/* Linked blocks sharply reduce inefficiencies when using small blocks,
+ * they compress better.
+ * However, some LZ4 decoders are only compatible with independent blocks */
+typedef enum {
+    LZ4F_blockLinked=0,
+    LZ4F_blockIndependent
+    LZ4F_OBSOLETE_ENUM(blockLinked)
+    LZ4F_OBSOLETE_ENUM(blockIndependent)
+} LZ4F_blockMode_t;
+
+typedef enum {
+    LZ4F_noContentChecksum=0,
+    LZ4F_contentChecksumEnabled
+    LZ4F_OBSOLETE_ENUM(noContentChecksum)
+    LZ4F_OBSOLETE_ENUM(contentChecksumEnabled)
+} LZ4F_contentChecksum_t;
+
+typedef enum {
+    LZ4F_noBlockChecksum=0,
+    LZ4F_blockChecksumEnabled
+} LZ4F_blockChecksum_t;
+
+typedef enum {
+    LZ4F_frame=0,
+    LZ4F_skippableFrame
+    LZ4F_OBSOLETE_ENUM(skippableFrame)
+} LZ4F_frameType_t;
+
+#ifdef LZ4F_ENABLE_OBSOLETE_ENUMS
+typedef LZ4F_blockSizeID_t blockSizeID_t;
+typedef LZ4F_blockMode_t blockMode_t;
+typedef LZ4F_frameType_t frameType_t;
+typedef LZ4F_contentChecksum_t contentChecksum_t;
+#endif
+
+/*! LZ4F_frameInfo_t :
+ *  makes it possible to set or read frame parameters.
+ *  It's not required to set all fields, as long as the structure was initially memset() to zero.
+ *  For all fields, 0 sets it to default value */
+typedef struct {
+  LZ4F_blockSizeID_t     blockSizeID;          /* max64KB, max256KB, max1MB, max4MB ; 0 == default */
+  LZ4F_blockMode_t       blockMode;            /* LZ4F_blockLinked, LZ4F_blockIndependent ; 0 == default */
+  LZ4F_contentChecksum_t contentChecksumFlag;  /* if enabled, frame is terminated with a 32-bits checksum of decompressed data ; 0 == disabled (default)  */
+  LZ4F_frameType_t       frameType;            /* read-only field : LZ4F_frame or LZ4F_skippableFrame */
+  unsigned long long     contentSize;          /* Size of uncompressed content ; 0 == unknown */
+  unsigned               dictID;               /* Dictionary ID, sent by the compressor to help decoder select the correct dictionary; 0 == no dictID provided */
+  LZ4F_blockChecksum_t   blockChecksumFlag;    /* if enabled, each block is followed by a checksum of block's compressed data ; 0 == disabled (default)  */
+} LZ4F_frameInfo_t;
+
+/*! LZ4F_preferences_t :
+ *  makes it possible to supply detailed compression parameters to the stream interface.
+ *  It's not required to set all fields, as long as the structure was initially memset() to zero.
+ *  All reserved fields must be set to zero. */
+typedef struct {
+  LZ4F_frameInfo_t frameInfo;
+  int      compressionLevel;       /* 0 == default (fast mode); values above LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values below 0 trigger "fast acceleration", proportional to value */
+  unsigned autoFlush;              /* 1 == always flush, to reduce usage of internal buffers */
+  unsigned reserved[4];            /* must be zero for forward compatibility */
+} LZ4F_preferences_t;
+
+LZ4FLIB_API int LZ4F_compressionLevel_max(void);
+
+
+/*-*********************************
+*  Simple compression function
+***********************************/
+/*! LZ4F_compressFrameBound() :
+ *  Returns the maximum possible size of a frame compressed with LZ4F_compressFrame() given srcSize content and preferences.
+ *  Note : this result is only usable with LZ4F_compressFrame(), not with multi-segments compression.
+ */
+LZ4FLIB_API size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+
+/*! LZ4F_compressFrame() :
+ *  Compress an entire srcBuffer into a valid LZ4 frame.
+ *  dstCapacity MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * @return : number of bytes written into dstBuffer.
+ *           or an error code if it fails (can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+                                const void* srcBuffer, size_t srcSize,
+                                const LZ4F_preferences_t* preferencesPtr);
+
+
+/*-***********************************
+*  Advanced compression functions
+*************************************/
+typedef struct LZ4F_cctx_s LZ4F_cctx;   /* incomplete type */
+typedef LZ4F_cctx* LZ4F_compressionContext_t;   /* for compatibility with previous API version */
+
+typedef struct {
+  unsigned stableSrc;    /* 1 == src content will remain present on future calls to LZ4F_compress(); skip copying src content within tmp buffer */
+  unsigned reserved[3];
+} LZ4F_compressOptions_t;
+
+/*---   Resource Management   ---*/
+
+#define LZ4F_VERSION 100
+LZ4FLIB_API unsigned LZ4F_getVersion(void);
+/*! LZ4F_createCompressionContext() :
+ * The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version.
+ * The version provided MUST be LZ4F_VERSION. It is intended to track potential version mismatch, notably when using DLL.
+ * The function will provide a pointer to a fully allocated LZ4F_cctx object.
+ * If @return != zero, there was an error during context creation.
+ * Object can release its memory using LZ4F_freeCompressionContext();
+ */
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** cctxPtr, unsigned version);
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
+
+
+/*----    Compression    ----*/
+
+#define LZ4F_HEADER_SIZE_MAX 19
+/*! LZ4F_compressBegin() :
+ *  will write the frame header into dstBuffer.
+ *  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * `prefsPtr` is optional : you can provide NULL as argument, all preferences will then be set to default.
+ * @return : number of bytes written into dstBuffer for the header
+ *           or an error code (which can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx,
+                                      void* dstBuffer, size_t dstCapacity,
+                                      const LZ4F_preferences_t* prefsPtr);
+
+/*! LZ4F_compressBound() :
+ * Provides dstCapacity given a srcSize to guarantee operation success in worst case situations.
+ * prefsPtr is optional : you can provide NULL as argument, preferences will be set to cover worst case scenario.
+ * Result is always the same for a srcSize and prefsPtr, so it can be trusted to size reusable buffers.
+ * When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
+ */
+LZ4FLIB_API size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
+
+/*! LZ4F_compressUpdate() :
+ * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ * An important rule is that dstCapacity MUST be large enough to ensure operation success even in worst case situations.
+ * This value is provided by LZ4F_compressBound().
+ * If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
+ * LZ4F_compressUpdate() doesn't guarantee error recovery. When an error occurs, compression context must be freed or resized.
+ * `cOptPtr` is optional : NULL can be provided, in which case all options are set to default.
+ * @return : number of bytes written into `dstBuffer` (it can be zero, meaning input data was just buffered).
+ *           or an error code if it fails (which can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* cOptPtr);
+
+/*! LZ4F_flush() :
+ * When data must be generated and sent immediately, without waiting for a block to be completely filled,
+ * it's possible to call LZ4_flush(). It will immediately compress any data buffered within cctx.
+ * `dstCapacity` must be large enough to ensure the operation will be successful.
+ * `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default.
+ * @return : number of bytes written into dstBuffer (it can be zero, which means there was no data stored within cctx)
+ *           or an error code if it fails (which can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr);
+
+/*! LZ4F_compressEnd() :
+ *  To properly finish an LZ4 frame, invoke LZ4F_compressEnd().
+ *  It will flush whatever data remained within `cctx` (like LZ4_flush())
+ *  and properly finalize the frame, with an endMark and a checksum.
+ * `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default.
+ * @return : number of bytes written into dstBuffer (necessarily >= 4 (endMark), or 8 if optional frame checksum is enabled)
+ *           or an error code if it fails (which can be tested using LZ4F_isError())
+ *  A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
+ */
+LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr);
+
+
+/*-*********************************
+*  Decompression functions
+***********************************/
+typedef struct LZ4F_dctx_s LZ4F_dctx;   /* incomplete type */
+typedef LZ4F_dctx* LZ4F_decompressionContext_t;   /* compatibility with previous API versions */
+
+typedef struct {
+  unsigned stableDst;    /* pledge that at least 64KB+64Bytes of previously decompressed data remain unmodifed where it was decoded. This optimization skips storage operations in tmp buffers */
+  unsigned reserved[3];  /* must be set to zero for forward compatibility */
+} LZ4F_decompressOptions_t;
+
+
+/* Resource management */
+
+/*!LZ4F_createDecompressionContext() :
+ * Create an LZ4F_dctx object, to track all decompression operations.
+ * The version provided MUST be LZ4F_VERSION.
+ * The function provides a pointer to an allocated and initialized LZ4F_dctx object.
+ * The result is an errorCode, which can be tested using LZ4F_isError().
+ * dctx memory can be released using LZ4F_freeDecompressionContext();
+ * The result of LZ4F_freeDecompressionContext() is indicative of the current state of decompressionContext when being released.
+ * That is, it should be == 0 if decompression has been completed fully and correctly.
+ */
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** dctxPtr, unsigned version);
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx);
+
+
+/*-***********************************
+*  Streaming decompression functions
+*************************************/
+
+/*! LZ4F_getFrameInfo() :
+ *  This function extracts frame parameters (max blockSize, dictID, etc.).
+ *  Its usage is optional.
+ *  Extracted information is typically useful for allocation and dictionary.
+ *  This function works in 2 situations :
+ *   - At the beginning of a new frame, in which case
+ *     it will decode information from `srcBuffer`, starting the decoding process.
+ *     Input size must be large enough to successfully decode the entire frame header.
+ *     Frame header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes.
+ *     It's allowed to provide more input data than this minimum.
+ *   - After decoding has been started.
+ *     In which case, no input is read, frame parameters are extracted from dctx.
+ *   - If decoding has barely started, but not yet extracted information from header,
+ *     LZ4F_getFrameInfo() will fail.
+ *  The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
+ *  Decompression must resume from (srcBuffer + *srcSizePtr).
+ * @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ *           or an error code which can be tested using LZ4F_isError().
+ *  note 1 : in case of error, dctx is not modified. Decoding operation can resume from beginning safely.
+ *  note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
+ */
+LZ4FLIB_API size_t LZ4F_getFrameInfo(LZ4F_dctx* dctx,
+                                     LZ4F_frameInfo_t* frameInfoPtr,
+                                     const void* srcBuffer, size_t* srcSizePtr);
+
+/*! LZ4F_decompress() :
+ *  Call this function repetitively to regenerate compressed data from `srcBuffer`.
+ *  The function will attempt to decode up to *srcSizePtr bytes from srcBuffer, into dstBuffer of capacity *dstSizePtr.
+ *
+ *  The number of bytes regenerated into dstBuffer is provided within *dstSizePtr (necessarily <= original value).
+ *
+ *  The number of bytes consumed from srcBuffer is provided within *srcSizePtr (necessarily <= original value).
+ *  Number of bytes consumed can be < number of bytes provided.
+ *  It typically happens when dstBuffer is not large enough to contain all decoded data.
+ *  Unconsumed source data must be presented again in subsequent invocations.
+ *
+ * `dstBuffer` content is expected to be flushed between each invocation, as its content will be overwritten.
+ * `dstBuffer` itself can be changed at will between each consecutive function invocation.
+ *
+ * @return : an hint of how many `srcSize` bytes LZ4F_decompress() expects for next call.
+ *  Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ *  Respecting the hint provides some small speed benefit, because it skips intermediate buffers.
+ *  This is just a hint though, it's always possible to provide any srcSize.
+ *  When a frame is fully decoded, @return will be 0 (no more data expected).
+ *  If decompression failed, @return is an error code, which can be tested using LZ4F_isError().
+ *
+ *  After a frame is fully decoded, dctx can be used again to decompress another frame.
+ *  After a decompression error, use LZ4F_resetDecompressionContext() before re-using dctx, to return to clean state.
+ */
+LZ4FLIB_API size_t LZ4F_decompress(LZ4F_dctx* dctx,
+                                   void* dstBuffer, size_t* dstSizePtr,
+                                   const void* srcBuffer, size_t* srcSizePtr,
+                                   const LZ4F_decompressOptions_t* dOptPtr);
+
+
+/*! LZ4F_resetDecompressionContext() : v1.8.0
+ *  In case of an error, the context is left in "undefined" state.
+ *  In which case, it's necessary to reset it, before re-using it.
+ *  This method can also be used to abruptly stop an unfinished decompression,
+ *  and start a new one using the same context. */
+LZ4FLIB_API void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx);   /* always successful */
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* LZ4F_H_09782039843 */
diff --git a/lz4/lz4frame_static.h b/lz4/lz4frame_static.h
new file mode 100644
index 0000000..1899f8e
--- /dev/null
+++ b/lz4/lz4frame_static.h
@@ -0,0 +1,143 @@
+/*
+   LZ4 auto-framing library
+   Header File for static linking only
+   Copyright (C) 2011-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/lz4/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#ifndef LZ4FRAME_STATIC_H_0398209384
+#define LZ4FRAME_STATIC_H_0398209384
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* lz4frame_static.h should be used solely in the context of static linking.
+ * It contains definitions which are not stable and may change in the future.
+ * Never use it in the context of DLL linking.
+ */
+
+
+/* ---   Dependency   --- */
+#include "lz4frame.h"
+
+
+/* ---   Error List   --- */
+#define LZ4F_LIST_ERRORS(ITEM) \
+        ITEM(OK_NoError) \
+        ITEM(ERROR_GENERIC) \
+        ITEM(ERROR_maxBlockSize_invalid) \
+        ITEM(ERROR_blockMode_invalid) \
+        ITEM(ERROR_contentChecksumFlag_invalid) \
+        ITEM(ERROR_compressionLevel_invalid) \
+        ITEM(ERROR_headerVersion_wrong) \
+        ITEM(ERROR_blockChecksum_invalid) \
+        ITEM(ERROR_reservedFlag_set) \
+        ITEM(ERROR_allocation_failed) \
+        ITEM(ERROR_srcSize_tooLarge) \
+        ITEM(ERROR_dstMaxSize_tooSmall) \
+        ITEM(ERROR_frameHeader_incomplete) \
+        ITEM(ERROR_frameType_unknown) \
+        ITEM(ERROR_frameSize_wrong) \
+        ITEM(ERROR_srcPtr_wrong) \
+        ITEM(ERROR_decompressionFailed) \
+        ITEM(ERROR_headerChecksum_invalid) \
+        ITEM(ERROR_contentChecksum_invalid) \
+        ITEM(ERROR_frameDecoding_alreadyStarted) \
+        ITEM(ERROR_maxCode)
+
+#define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM,
+
+/* enum list is exposed, to handle specific errors */
+typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
+
+LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
+
+
+
+/**********************************
+ *  Bulk processing dictionary API
+ *********************************/
+typedef struct LZ4F_CDict_s LZ4F_CDict;
+
+/*! LZ4_createCDict() :
+ *  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+ *  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ *  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
+LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
+void        LZ4F_freeCDict(LZ4F_CDict* CDict);
+
+
+/*! LZ4_compressFrame_usingCDict() :
+ *  Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
+ *  If cdict==NULL, compress without a dictionary.
+ *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  If this condition is not respected, function will fail (@return an errorCode).
+ *  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+ *  but it's not recommended, as it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer.
+ *           or an error code if it fails (can be tested using LZ4F_isError()) */
+size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               const LZ4F_CDict* cdict,
+                               const LZ4F_preferences_t* preferencesPtr);
+
+
+/*! LZ4F_compressBegin_usingCDict() :
+ *  Inits streaming dictionary compression, and writes the frame header into dstBuffer.
+ *  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * `prefsPtr` is optional : you may provide NULL as argument,
+ *  however, it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer for the header,
+ *           or an error code (which can be tested using LZ4F_isError()) */
+size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctx,
+                                     void* dstBuffer, size_t dstCapacity,
+                                     const LZ4F_CDict* cdict,
+                                     const LZ4F_preferences_t* prefsPtr);
+
+
+/*! LZ4F_decompress_usingDict() :
+ *  Same as LZ4F_decompress(), using a predefined dictionary.
+ *  Dictionary is used "in place", without any preprocessing.
+ *  It must remain accessible throughout the entire frame decoding. */
+size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctxPtr,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const void* dict, size_t dictSize,
+                       const LZ4F_decompressOptions_t* decompressOptionsPtr);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* LZ4FRAME_STATIC_H_0398209384 */
diff --git a/lz4/lz4hc.c b/lz4/lz4hc.c
index f28283f..22eb071 100644
--- a/lz4/lz4hc.c
+++ b/lz4/lz4hc.c
@@ -1,670 +1,387 @@
 /*
-   LZ4 HC - High Compression Mode of LZ4
-   Copyright (C) 2011-2013, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
+    LZ4 HC - High Compression Mode of LZ4
+    Copyright (C) 2011-2017, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - LZ4 source repository : https://github.com/lz4/lz4
+       - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
+/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
 
-//**************************************
-// Memory routines
-//**************************************
-#include <stdlib.h>   // calloc, free
-#define ALLOCATOR(s)  calloc(1,s)
-#define FREEMEM       free
-#include <string.h>   // memset, memcpy
-#define MEM_INIT      memset
-
-
-//**************************************
-// CPU Feature Detection
-//**************************************
-// 32 or 64 bits ?
-#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
-  || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
-  || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
-  || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) )   // Detects 64 bits mode
-#  define LZ4_ARCH64 1
-#else
-#  define LZ4_ARCH64 0
-#endif
-
-// Little Endian or Big Endian ?
-// Overwrite the #define below if you know your architecture endianess
-#if defined (__GLIBC__)
-#  include <endian.h>
-#  if (__BYTE_ORDER == __BIG_ENDIAN)
-#     define LZ4_BIG_ENDIAN 1
-#  endif
-#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
-#  define LZ4_BIG_ENDIAN 1
-#elif defined(__sparc) || defined(__sparc__) \
-   || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
-   || defined(__hpux)  || defined(__hppa) \
-   || defined(_MIPSEB) || defined(__s390__)
-#  define LZ4_BIG_ENDIAN 1
-#else
-// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
-#endif
-
-// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
-// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
-// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
-#if defined(__ARM_FEATURE_UNALIGNED)
-#  define LZ4_FORCE_UNALIGNED_ACCESS 1
-#endif
-
-// Define this parameter if your target system or compiler does not support hardware bit count
-#if defined(_MSC_VER) && defined(_WIN32_WCE)            // Visual Studio for Windows CE does not support Hardware bit count
-#  define LZ4_FORCE_SW_BITCOUNT
-#endif
-
-
-//**************************************
-// Compiler Options
-//**************************************
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
-  /* "restrict" is a known keyword */
-#else
-#  define restrict  // Disable restrict
-#endif
 
-#ifdef _MSC_VER    // Visual Studio
-#  define FORCE_INLINE static __forceinline
-#  include <intrin.h>                    // For Visual 2005
-#  if LZ4_ARCH64   // 64-bits
-#    pragma intrinsic(_BitScanForward64) // For Visual 2005
-#    pragma intrinsic(_BitScanReverse64) // For Visual 2005
-#  else            // 32-bits
-#    pragma intrinsic(_BitScanForward)   // For Visual 2005
-#    pragma intrinsic(_BitScanReverse)   // For Visual 2005
-#  endif
-#  pragma warning(disable : 4127)        // disable: C4127: conditional expression is constant
-#  pragma warning(disable : 4701)        // disable: C4701: potentially uninitialized local variable used
-#else
-#  ifdef __GNUC__
-#    define FORCE_INLINE static inline __attribute__((always_inline))
-#  else
-#    define FORCE_INLINE static inline
-#  endif
-#endif
+/* *************************************
+*  Tuning Parameter
+***************************************/
 
-#ifdef _MSC_VER  // Visual Studio
-#  define lz4_bswap16(x) _byteswap_ushort(x)
-#else
-#  define lz4_bswap16(x)  ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
+/*! HEAPMODE :
+ *  Select how default compression function will allocate workplace memory,
+ *  in stack (0:fastest), or in heap (1:requires malloc()).
+ *  Since workplace is rather large, heap mode is recommended.
+ */
+#ifndef LZ4HC_HEAPMODE
+#  define LZ4HC_HEAPMODE 1
 #endif
 
 
-//**************************************
-// Includes
-//**************************************
+/*===    Dependency    ===*/
 #include "lz4hc.h"
-#include "lz4.h"
-
-
-//**************************************
-// Basic Types
-//**************************************
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
-# include <stdint.h>
-  typedef uint8_t  BYTE;
-  typedef uint16_t U16;
-  typedef uint32_t U32;
-  typedef  int32_t S32;
-  typedef uint64_t U64;
-#else
-  typedef unsigned char       BYTE;
-  typedef unsigned short      U16;
-  typedef unsigned int        U32;
-  typedef   signed int        S32;
-  typedef unsigned long long  U64;
-#endif
 
-#if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
-#  define _PACKED __attribute__ ((packed))
-#else
-#  define _PACKED
-#endif
 
-#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  ifdef __IBMC__
-#    pragma pack(1)
-#  else
-#    pragma pack(push, 1)
-#  endif
+/*===   Common LZ4 definitions   ===*/
+#if defined(__GNUC__)
+#  pragma GCC diagnostic ignored "-Wunused-function"
 #endif
-
-typedef struct _U16_S { U16 v; } _PACKED U16_S;
-typedef struct _U32_S { U32 v; } _PACKED U32_S;
-typedef struct _U64_S { U64 v; } _PACKED U64_S;
-
-#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  pragma pack(pop)
+#if defined (__clang__)
+#  pragma clang diagnostic ignored "-Wunused-function"
 #endif
 
-#define A64(x) (((U64_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A16(x) (((U16_S *)(x))->v)
+#define LZ4_COMMONDEFS_ONLY
+#include "lz4.c"   /* LZ4_count, constants, mem */
 
 
-//**************************************
-// Constants
-//**************************************
-#define MINMATCH 4
-
-#define DICTIONARY_LOGSIZE 16
-#define MAXD (1<<DICTIONARY_LOGSIZE)
-#define MAXD_MASK ((U32)(MAXD - 1))
-#define MAX_DISTANCE (MAXD - 1)
-
-#define HASH_LOG (DICTIONARY_LOGSIZE-1)
-#define HASHTABLESIZE (1 << HASH_LOG)
-#define HASH_MASK (HASHTABLESIZE - 1)
-
-#define MAX_NB_ATTEMPTS 256
-
-#define ML_BITS  4
-#define ML_MASK  (size_t)((1U<<ML_BITS)-1)
-#define RUN_BITS (8-ML_BITS)
-#define RUN_MASK ((1U<<RUN_BITS)-1)
-
-#define COPYLENGTH 8
-#define LASTLITERALS 5
-#define MFLIMIT (COPYLENGTH+MINMATCH)
-#define MINLENGTH (MFLIMIT+1)
+/*===   Constants   ===*/
 #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
 
-#define KB *(1U<<10)
-#define MB *(1U<<20)
-#define GB *(1U<<30)
-
-
-//**************************************
-// Architecture-specific macros
-//**************************************
-#if LZ4_ARCH64   // 64-bit
-#  define STEPSIZE 8
-#  define LZ4_COPYSTEP(s,d)     A64(d) = A64(s); d+=8; s+=8;
-#  define LZ4_COPYPACKET(s,d)   LZ4_COPYSTEP(s,d)
-#  define UARCH U64
-#  define AARCH A64
-#  define HTYPE                 U32
-#  define INITBASE(b,s)         const BYTE* const b = s
-#else   // 32-bit
-#  define STEPSIZE 4
-#  define LZ4_COPYSTEP(s,d)     A32(d) = A32(s); d+=4; s+=4;
-#  define LZ4_COPYPACKET(s,d)   LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
-#  define UARCH U32
-#  define AARCH A32
-//#  define HTYPE                 const BYTE*
-//#  define INITBASE(b,s)         const int b = 0
-#  define HTYPE                 U32
-#  define INITBASE(b,s)         const BYTE* const b = s
-#endif
 
-#if defined(LZ4_BIG_ENDIAN)
-#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
-#  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
-#else   // Little Endian
-#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
-#  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
-#endif
-
-
-//************************************************************
-// Local Types
-//************************************************************
-typedef struct
-{
-    const BYTE* inputBuffer;
-    const BYTE* base;
-    const BYTE* end;
-    HTYPE hashTable[HASHTABLESIZE];
-    U16 chainTable[MAXD];
-    const BYTE* nextToUpdate;
-} LZ4HC_Data_Structure;
-
-
-//**************************************
-// Macros
-//**************************************
-#define LZ4_WILDCOPY(s,d,e)    do { LZ4_COPYPACKET(s,d) } while (d<e);
-#define LZ4_BLINDCOPY(s,d,l)   { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }
-#define HASH_FUNCTION(i)       (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
-#define HASH_VALUE(p)          HASH_FUNCTION(A32(p))
-#define HASH_POINTER(p)        (HashTable[HASH_VALUE(p)] + base)
-#define DELTANEXT(p)           chainTable[(size_t)(p) & MAXD_MASK]
-#define GETNEXT(p)             ((p) - (size_t)DELTANEXT(p))
-
-
-//**************************************
-// Private functions
-//**************************************
-#if LZ4_ARCH64
-
-FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
-{
-#if defined(LZ4_BIG_ENDIAN)
-#  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanReverse64( &r, val );
-    return (int)(r>>3);
-#  elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clzll(val) >> 3);
-#  else
-    int r;
-    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
-    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
-    r += (!val);
-    return r;
-#  endif
-#else
-#  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanForward64( &r, val );
-    return (int)(r>>3);
-#  elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctzll(val) >> 3);
-#  else
-    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
-    return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
-#  endif
-#endif
-}
-
-#else
-
-FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
-{
-#if defined(LZ4_BIG_ENDIAN)
-#  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r;
-    _BitScanReverse( &r, val );
-    return (int)(r>>3);
-#  elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clz(val) >> 3);
-#  else
-    int r;
-    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-    r += (!val);
-    return r;
-#  endif
-#else
-#  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r;
-    _BitScanForward( &r, val );
-    return (int)(r>>3);
-#  elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctz(val) >> 3);
-#  else
-    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
-    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
-#  endif
-#endif
-}
+/*===   Macros   ===*/
+#define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
+#define MAX(a,b)   ( (a) > (b) ? (a) : (b) )
+#define HASH_FUNCTION(i)         (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
+#define DELTANEXTMAXD(p)         chainTable[(p) & LZ4HC_MAXD_MASK]    /* flexible, LZ4HC_MAXD dependent */
+#define DELTANEXTU16(table, pos) table[(U16)(pos)]   /* faster */
 
-#endif
+static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
 
 
-int LZ4_sizeofStreamStateHC()
-{
-    return sizeof(LZ4HC_Data_Structure);
-}
 
-FORCE_INLINE void LZ4_initHC (LZ4HC_Data_Structure* hc4, const BYTE* base)
+/**************************************
+*  HC Compression
+**************************************/
+static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
 {
     MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
     MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
-    hc4->nextToUpdate = base + 1;
-    hc4->base = base;
-    hc4->inputBuffer = base;
-    hc4->end = base;
-}
-
-int LZ4_resetStreamStateHC(void* state, const char* inputBuffer)
-{
-    if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1;   // Error : pointer is not aligned for pointer (32 or 64 bits)
-    LZ4_initHC((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer);
-    return 0;
-}
-
-
-void* LZ4_createHC (const char* inputBuffer)
-{
-    void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
-    LZ4_initHC ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer);
-    return hc4;
-}
-
-
-int LZ4_freeHC (void* LZ4HC_Data)
-{
-    FREEMEM(LZ4HC_Data);
-    return (0);
+    hc4->nextToUpdate = 64 KB;
+    hc4->base = start - 64 KB;
+    hc4->end = start;
+    hc4->dictBase = start - 64 KB;
+    hc4->dictLimit = 64 KB;
+    hc4->lowLimit = 64 KB;
 }
 
 
-// Update chains up to ip (excluded)
-FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
+/* Update chains up to ip (excluded) */
+FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
 {
-    U16*   chainTable = hc4->chainTable;
-    HTYPE* HashTable  = hc4->hashTable;
-    INITBASE(base,hc4->base);
-
-    while(hc4->nextToUpdate < ip)
-    {
-        const BYTE* const p = hc4->nextToUpdate;
-        size_t delta = (p) - HASH_POINTER(p);
+    U16* const chainTable = hc4->chainTable;
+    U32* const hashTable  = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = hc4->nextToUpdate;
+
+    while (idx < target) {
+        U32 const h = LZ4HC_hashPtr(base+idx);
+        size_t delta = idx - hashTable[h];
         if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
-        DELTANEXT(p) = (U16)delta;
-        HashTable[HASH_VALUE(p)] = (HTYPE)((p) - base);
-        hc4->nextToUpdate++;
+        DELTANEXTU16(chainTable, idx) = (U16)delta;
+        hashTable[h] = idx;
+        idx++;
     }
-}
-
 
-char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
-{
-    LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
-    U32 distance = (U32)(hc4->end - hc4->inputBuffer) - 64 KB;
-    distance = (distance >> 16) << 16;   // Must be a multiple of 64 KB
-    LZ4HC_Insert(hc4, hc4->end - MINMATCH);
-    memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB);
-    hc4->nextToUpdate -= distance;
-    hc4->base -= distance;
-    if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB)   // Avoid overflow
-    {
-        int i;
-        hc4->base += 1 GB;
-        for (i=0; i<HASHTABLESIZE; i++) hc4->hashTable[i] -= 1 GB;
-    }
-    hc4->end -= distance;
-    return (char*)(hc4->end);
-}
-
-
-FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
-{
-    const BYTE* p1t = p1;
-
-    while (p1t<matchlimit-(STEPSIZE-1))
-    {
-        UARCH diff = AARCH(p2) ^ AARCH(p1t);
-        if (!diff) { p1t+=STEPSIZE; p2+=STEPSIZE; continue; }
-        p1t += LZ4_NbCommonBytes(diff);
-        return (p1t - p1);
-    }
-    if (LZ4_ARCH64) if ((p1t<(matchlimit-3)) && (A32(p2) == A32(p1t))) { p1t+=4; p2+=4; }
-    if ((p1t<(matchlimit-1)) && (A16(p2) == A16(p1t))) { p1t+=2; p2+=2; }
-    if ((p1t<matchlimit) && (*p2 == *p1t)) p1t++;
-    return (p1t - p1);
+    hc4->nextToUpdate = target;
 }
 
 
-FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
+FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* const hc4,   /* Index table will be updated */
+                                               const BYTE* const ip, const BYTE* const iLimit,
+                                               const BYTE** matchpos,
+                                               const int maxNbAttempts)
 {
     U16* const chainTable = hc4->chainTable;
-    HTYPE* const HashTable = hc4->hashTable;
-    const BYTE* ref;
-    INITBASE(base,hc4->base);
-    int nbAttempts=MAX_NB_ATTEMPTS;
-    size_t repl=0, ml=0;
-    U16 delta=0;  // useless assignment, to remove an uninitialization warning
-
-    // HC4 match finder
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const BYTE* const dictBase = hc4->dictBase;
+    const U32 dictLimit = hc4->dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    U32 matchIndex;
+    int nbAttempts = maxNbAttempts;
+    size_t ml = 0;
+
+    /* HC4 match finder */
     LZ4HC_Insert(hc4, ip);
-    ref = HASH_POINTER(ip);
-
-#define REPEAT_OPTIMIZATION
-#ifdef REPEAT_OPTIMIZATION
-    // Detect repetitive sequences of length <= 4
-    if ((U32)(ip-ref) <= 4)        // potential repetition
-    {
-        if (A32(ref) == A32(ip))   // confirmed
-        {
-            delta = (U16)(ip-ref);
-            repl = ml  = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
-            *matchpos = ref;
-        }
-        ref = GETNEXT(ref);
-    }
-#endif
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
 
-    while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
-    {
+    while ((matchIndex>=lowLimit) && (nbAttempts)) {
         nbAttempts--;
-        if (*(ref+ml) == *(ip+ml))
-        if (A32(ref) == A32(ip))
-        {
-            size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
-            if (mlt > ml) { ml = mlt; *matchpos = ref; }
-        }
-        ref = GETNEXT(ref);
-    }
-
-#ifdef REPEAT_OPTIMIZATION
-    // Complete table
-    if (repl)
-    {
-        const BYTE* ptr = ip;
-        const BYTE* end;
-
-        end = ip + repl - (MINMATCH-1);
-        while(ptr < end-delta)
-        {
-            DELTANEXT(ptr) = delta;    // Pre-Load
-            ptr++;
+        if (matchIndex >= dictLimit) {
+            const BYTE* const match = base + matchIndex;
+            if ( (*(match+ml) == *(ip+ml))   /* can be longer */
+               && (LZ4_read32(match) == LZ4_read32(ip)) )
+            {
+                size_t const mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
+                if (mlt > ml) { ml = mlt; *matchpos = match; }
+            }
+        } else {
+            const BYTE* const match = dictBase + matchIndex;
+            if (LZ4_read32(match) == LZ4_read32(ip)) {
+                size_t mlt;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iLimit) vLimit = iLimit;
+                mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
+                if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; }   /* virtual matchpos */
+            }
         }
-        do
-        {
-            DELTANEXT(ptr) = delta;
-            HashTable[HASH_VALUE(ptr)] = (HTYPE)((ptr) - base);     // Head of chain
-            ptr++;
-        } while(ptr < end);
-        hc4->nextToUpdate = end;
+        matchIndex -= DELTANEXTU16(chainTable, matchIndex);
     }
-#endif
 
     return (int)ml;
 }
 
 
-FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
+FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
+    LZ4HC_CCtx_internal* hc4,
+    const BYTE* const ip,
+    const BYTE* const iLowLimit,
+    const BYTE* const iHighLimit,
+    int longest,
+    const BYTE** matchpos,
+    const BYTE** startpos,
+    const int maxNbAttempts)
 {
-    U16* const  chainTable = hc4->chainTable;
-    HTYPE* const HashTable = hc4->hashTable;
-    INITBASE(base,hc4->base);
-    const BYTE*  ref;
-    int nbAttempts = MAX_NB_ATTEMPTS;
-    int delta = (int)(ip-startLimit);
-
-    // First Match
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const U32 dictLimit = hc4->dictLimit;
+    const BYTE* const lowPrefixPtr = base + dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    const BYTE* const dictBase = hc4->dictBase;
+    int const delta = (int)(ip-iLowLimit);
+    int nbAttempts = maxNbAttempts;
+    U32 matchIndex;
+
+
+    /* First Match */
     LZ4HC_Insert(hc4, ip);
-    ref = HASH_POINTER(ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
 
-    while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
-    {
+    while ((matchIndex>=lowLimit) && (nbAttempts)) {
         nbAttempts--;
-        if (*(startLimit + longest) == *(ref - delta + longest))
-        if (A32(ref) == A32(ip))
-        {
-#if 1
-            const BYTE* reft = ref+MINMATCH;
-            const BYTE* ipt = ip+MINMATCH;
-            const BYTE* startt = ip;
-
-            while (ipt<matchlimit-(STEPSIZE-1))
-            {
-                UARCH diff = AARCH(reft) ^ AARCH(ipt);
-                if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
-                ipt += LZ4_NbCommonBytes(diff);
-                goto _endCount;
-            }
-            if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
-            if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
-            if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
-_endCount:
-            reft = ref;
-#else
-            // Easier for code maintenance, but unfortunately slower too
-            const BYTE* startt = ip;
-            const BYTE* reft = ref;
-            const BYTE* ipt = ip + MINMATCH + LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit);
-#endif
-
-            while ((startt>startLimit) && (reft > hc4->inputBuffer) && (startt[-1] == reft[-1])) {startt--; reft--;}
+        if (matchIndex >= dictLimit) {
+            const BYTE* const matchPtr = base + matchIndex;
+            if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
+                if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+                    int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+                    int back = 0;
+
+                    while ( (ip+back > iLowLimit)
+                         && (matchPtr+back > lowPrefixPtr)
+                         && (ip[back-1] == matchPtr[back-1])) {
+                            back--;
+                    }
 
-            if ((ipt-startt) > longest)
-            {
-                longest = (int)(ipt-startt);
-                *matchpos = reft;
-                *startpos = startt;
+                    mlt -= back;
+
+                    if (mlt > longest) {
+                        longest = mlt;
+                        *matchpos = matchPtr+back;
+                        *startpos = ip+back;
+            }   }   }
+        } else {
+            const BYTE* const matchPtr = dictBase + matchIndex;
+            if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+                int mlt;
+                int back=0;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iHighLimit) vLimit = iHighLimit;
+                mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
+                while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--;
+                mlt -= back;
+                if (mlt > longest) { longest = mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
             }
         }
-        ref = GETNEXT(ref);
+        matchIndex -= DELTANEXTU16(chainTable, matchIndex);
     }
 
     return longest;
 }
 
 
-typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum {
+    noLimit = 0,
+    limitedOutput = 1,
+    limitedDestSize = 2,
+} limitedOutput_directive;
 
+#ifndef LZ4HC_DEBUG
+#  define LZ4HC_DEBUG 0
+#endif
+
+/* LZ4HC_encodeSequence() :
+ * @return : 0 if ok,
+ *           1 if buffer issue detected */
 FORCE_INLINE int LZ4HC_encodeSequence (
-                       const BYTE** ip,
-                       BYTE** op,
-                       const BYTE** anchor,
-                       int matchLength,
-                       const BYTE* ref,
-                       limitedOutput_directive limitedOutputBuffer,
-                       BYTE* oend)
+    const BYTE** ip,
+    BYTE** op,
+    const BYTE** anchor,
+    int matchLength,
+    const BYTE* const match,
+    limitedOutput_directive limit,
+    BYTE* oend)
 {
-    int length;
-    BYTE* token;
-
-    // Encode Literal length
-    length = (int)(*ip - *anchor);
-    token = (*op)++;
-    if ((limitedOutputBuffer) && ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend)) return 1;   // Check output limit
-    if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; }
-    else *token = (BYTE)(length<<ML_BITS);
+    size_t length;
+    BYTE* const token = (*op)++;
 
-    // Copy Literals
-    LZ4_BLINDCOPY(*anchor, *op, length);
+#if LZ4HC_DEBUG
+    printf("literal : %u  --  match : %u  --  offset : %u\n",
+           (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match));
+#endif
 
-    // Encode Offset
-    LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
+    /* Encode Literal length */
+    length = (size_t)(*ip - *anchor);
+    if ((limit) && ((*op + (length >> 8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
+    if (length >= RUN_MASK) {
+        size_t len = length - RUN_MASK;
+        *token = (RUN_MASK << ML_BITS);
+        for(; len >= 255 ; len -= 255) *(*op)++ = 255;
+        *(*op)++ = (BYTE)len;
+    } else {
+        *token = (BYTE)(length << ML_BITS);
+    }
 
-    // Encode MatchLength
-    length = (int)(matchLength-MINMATCH);
-    if ((limitedOutputBuffer) && (*op + (1 + LASTLITERALS) + (length>>8) > oend)) return 1;   // Check output limit
-    if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
-    else *token += (BYTE)(length);
+    /* Copy Literals */
+    LZ4_wildCopy(*op, *anchor, (*op) + length);
+    *op += length;
+
+    /* Encode Offset */
+    LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
+
+    /* Encode MatchLength */
+    length = (size_t)(matchLength - MINMATCH);
+    if ((limit) && (*op + (length >> 8) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
+    if (length >= ML_MASK) {
+        *token += ML_MASK;
+        length -= ML_MASK;
+        for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; }
+        if (length >= 255) { length -= 255; *(*op)++ = 255; }
+        *(*op)++ = (BYTE)length;
+    } else {
+        *token += (BYTE)(length);
+    }
 
-    // Prepare next loop
+    /* Prepare next loop */
     *ip += matchLength;
     *anchor = *ip;
 
     return 0;
 }
 
+/* btopt */
+#include "lz4opt.h"
 
-static int LZ4HC_compress_generic (
-                 void* ctxvoid,
-                 const char* source,
-                 char* dest,
-                 int inputSize,
-                 int maxOutputSize,
-                 limitedOutput_directive limit
-                )
+
+static int LZ4HC_compress_hashChain (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const source,
+    char* const dest,
+    int* srcSizePtr,
+    int const maxOutputSize,
+    unsigned maxNbAttempts,
+    limitedOutput_directive limit
+    )
 {
-    LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
+    const int inputSize = *srcSizePtr;
+
     const BYTE* ip = (const BYTE*) source;
     const BYTE* anchor = ip;
     const BYTE* const iend = ip + inputSize;
     const BYTE* const mflimit = iend - MFLIMIT;
     const BYTE* const matchlimit = (iend - LASTLITERALS);
 
+    BYTE* optr = (BYTE*) dest;
     BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
+    BYTE* oend = op + maxOutputSize;
 
     int   ml, ml2, ml3, ml0;
-    const BYTE* ref=NULL;
-    const BYTE* start2=NULL;
-    const BYTE* ref2=NULL;
-    const BYTE* start3=NULL;
-    const BYTE* ref3=NULL;
+    const BYTE* ref = NULL;
+    const BYTE* start2 = NULL;
+    const BYTE* ref2 = NULL;
+    const BYTE* start3 = NULL;
+    const BYTE* ref3 = NULL;
     const BYTE* start0;
     const BYTE* ref0;
 
+    /* init */
+    *srcSizePtr = 0;
+    if (limit == limitedDestSize && maxOutputSize < 1) return 0;         /* Impossible to store anything */
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;              /* Unsupported input size, too large (or negative) */
 
-    // Ensure blocks follow each other
-    if (ip != ctx->end) return 0;
     ctx->end += inputSize;
+    if (limit == limitedDestSize) oend -= LASTLITERALS;                  /* Hack for support limitations LZ4 decompressor */
+    if (inputSize < LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
 
     ip++;
 
-    // Main Loop
-    while (ip < mflimit)
-    {
-        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
+    /* Main Loop */
+    while (ip < mflimit) {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
         if (!ml) { ip++; continue; }
 
-        // saved, in case we would skip too much
+        /* saved, in case we would skip too much */
         start0 = ip;
         ref0 = ref;
         ml0 = ml;
 
 _Search2:
         if (ip+ml < mflimit)
-            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
-        else ml2 = ml;
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts);
+        else
+            ml2 = ml;
 
-        if (ml2 == ml)  // No better match
-        {
-            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+        if (ml2 == ml) { /* No better match */
+            optr = op;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
             continue;
         }
 
-        if (start0 < ip)
-        {
-            if (start2 < ip + ml0)   // empirical
-            {
+        if (start0 < ip) {
+            if (start2 < ip + ml0) {  /* empirical */
                 ip = start0;
                 ref = ref0;
                 ml = ml0;
             }
         }
 
-        // Here, start0==ip
-        if ((start2 - ip) < 3)   // First Match too small : removed
-        {
+        /* Here, start0==ip */
+        if ((start2 - ip) < 3) {  /* First Match too small : removed */
             ml = ml2;
             ip = start2;
             ref =ref2;
@@ -672,59 +389,56 @@ static int LZ4HC_compress_generic (
         }
 
 _Search3:
-        // Currently we have :
-        // ml2 > ml1, and
-        // ip1+3 <= ip2 (usually < ip1+ml1)
-        if ((start2 - ip) < OPTIMAL_ML)
-        {
+        /* At this stage, we have :
+        *  ml2 > ml1, and
+        *  ip1+3 <= ip2 (usually < ip1+ml1) */
+        if ((start2 - ip) < OPTIMAL_ML) {
             int correction;
             int new_ml = ml;
             if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
             if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
             correction = new_ml - (int)(start2 - ip);
-            if (correction > 0)
-            {
+            if (correction > 0) {
                 start2 += correction;
                 ref2 += correction;
                 ml2 -= correction;
             }
         }
-        // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
+        /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
 
         if (start2 + ml2 < mflimit)
-            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
-        else ml3 = ml2;
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
+        else
+            ml3 = ml2;
 
-        if (ml3 == ml2) // No better match : 2 sequences to encode
-        {
-            // ip & ref are known; Now for ml
+        if (ml3 == ml2) {  /* No better match : 2 sequences to encode */
+            /* ip & ref are known; Now for ml */
             if (start2 < ip+ml)  ml = (int)(start2 - ip);
-            // Now, encode 2 sequences
-            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            /* Now, encode 2 sequences */
+            optr = op;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
             ip = start2;
-            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
+            optr = op;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) goto _dest_overflow;
             continue;
         }
 
-        if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
-        {
-            if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
-            {
-                if (start2 < ip+ml)
-                {
+        if (start3 < ip+ml+3) {  /* Not enough space for match 2 : remove it */
+            if (start3 >= (ip+ml)) {  /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+                if (start2 < ip+ml) {
                     int correction = (int)(ip+ml - start2);
                     start2 += correction;
                     ref2 += correction;
                     ml2 -= correction;
-                    if (ml2 < MINMATCH)
-                    {
+                    if (ml2 < MINMATCH) {
                         start2 = start3;
                         ref2 = ref3;
                         ml2 = ml3;
                     }
                 }
 
-                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+                optr = op;
+                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
                 ip  = start3;
                 ref = ref3;
                 ml  = ml3;
@@ -741,29 +455,27 @@ static int LZ4HC_compress_generic (
             goto _Search3;
         }
 
-        // OK, now we have 3 ascending matches; let's write at least the first one
-        // ip & ref are known; Now for ml
-        if (start2 < ip+ml)
-        {
-            if ((start2 - ip) < (int)ML_MASK)
-            {
+        /*
+        * OK, now we have 3 ascending matches; let's write at least the first one
+        * ip & ref are known; Now for ml
+        */
+        if (start2 < ip+ml) {
+            if ((start2 - ip) < (int)ML_MASK) {
                 int correction;
                 if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
                 if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
                 correction = ml - (int)(start2 - ip);
-                if (correction > 0)
-                {
+                if (correction > 0) {
                     start2 += correction;
                     ref2 += correction;
                     ml2 -= correction;
                 }
-            }
-            else
-            {
+            } else {
                 ml = (int)(start2 - ip);
             }
         }
-        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+        optr = op;
+        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
 
         ip = start2;
         ref = ref2;
@@ -774,83 +486,322 @@ static int LZ4HC_compress_generic (
         ml2 = ml3;
 
         goto _Search3;
-
     }
 
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  // Check output limit
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-        else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
+_last_literals:
+    /* Encode Last Literals */
+    {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
+        size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+        size_t const totalSize = 1 + litLength + lastRunSize;
+        if (limit == limitedDestSize) oend += LASTLITERALS;  /* restore correct value */
+        if (limit && (op + totalSize > oend)) {
+            if (limit == limitedOutput) return 0;  /* Check output limit */
+            /* adapt lastRunSize to fill 'dest' */
+            lastRunSize  = (size_t)(oend - op) - 1;
+            litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+            lastRunSize -= litLength;
+        }
+        ip = anchor + lastRunSize;
+
+        if (lastRunSize >= RUN_MASK) {
+            size_t accumulator = lastRunSize - RUN_MASK;
+            *op++ = (RUN_MASK << ML_BITS);
+            for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRunSize << ML_BITS);
+        }
+        memcpy(op, anchor, lastRunSize);
+        op += lastRunSize;
     }
 
-    // End
+    /* End */
+    *srcSizePtr = (int) (((const char*)ip) - source);
     return (int) (((char*)op)-dest);
+
+_dest_overflow:
+    if (limit == limitedDestSize) {
+        op = optr;  /* restore correct out pointer */
+        goto _last_literals;
+    }
+    return 0;
 }
 
+static int LZ4HC_getSearchNum(int compressionLevel)
+{
+    switch (compressionLevel) {
+        default: return 0; /* unused */
+        case 11: return 128;
+        case 12: return 1<<10;
+    }
+}
 
-int LZ4_compressHC(const char* source, char* dest, int inputSize)
+static int LZ4HC_compress_generic (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const src,
+    char* const dst,
+    int* const srcSizePtr,
+    int const dstCapacity,
+    int cLevel,
+    limitedOutput_directive limit
+    )
 {
-    void* ctx = LZ4_createHC(source);
-    int result;
-    if (ctx==NULL) return 0;
+    if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT;   /* note : convention is different from lz4frame, maybe to reconsider */
+    if (cLevel > 9) {
+        if (limit == limitedDestSize) cLevel = 10;
+        switch (cLevel) {
+            case 10:
+                return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1 << 12, limit);
+            case 11:
+                ctx->searchNum = LZ4HC_getSearchNum(cLevel);
+                return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, 128, 0);
+            default:
+                cLevel = 12;
+                /* fall-through */
+            case 12:
+                ctx->searchNum = LZ4HC_getSearchNum(cLevel);
+                return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, LZ4_OPT_NUM, 1);
+        }
+    }
+    return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1 << (cLevel-1), limit);  /* levels 1-9 */
+}
 
-    result = LZ4HC_compress_generic (ctx, source, dest, inputSize, 0, noLimit);
 
-    LZ4_freeHC(ctx);
-    return result;
+int LZ4_sizeofStateHC(void) { return sizeof(LZ4_streamHC_t); }
+
+int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+    LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
+    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    LZ4HC_init (ctx, (const BYTE*)src);
+    if (dstCapacity < LZ4_compressBound(srcSize))
+        return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput);
+    else
+        return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, noLimit);
 }
 
-int LZ4_compressHC_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
 {
-    void* ctx = LZ4_createHC(source);
-    int result;
-    if (ctx==NULL) return 0;
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+    LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t));
+#else
+    LZ4_streamHC_t state;
+    LZ4_streamHC_t* const statePtr = &state;
+#endif
+    int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel);
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+    free(statePtr);
+#endif
+    return cSize;
+}
+
+/* LZ4_compress_HC_destSize() :
+ * currently, only compatible with Hash Chain implementation,
+ * hence limit compression level to LZ4HC_CLEVEL_OPT_MIN-1*/
+int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
+{
+    LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
+    LZ4HC_init(ctx, (const BYTE*) source);
+    return LZ4HC_compress_generic(ctx, source, dest, sourceSizePtr, targetDestSize, cLevel, limitedDestSize);
+}
+
 
-    result = LZ4HC_compress_generic (ctx, source, dest, inputSize, maxOutputSize, limitedOutput);
 
-    LZ4_freeHC(ctx);
-    return result;
+/**************************************
+*  Streaming Functions
+**************************************/
+/* allocation */
+LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); }
+int             LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) {
+    if (!LZ4_streamHCPtr) return 0;  /* support free on NULL */
+    free(LZ4_streamHCPtr);
+    return 0;
 }
 
 
-//*****************************
-// Using an external allocation
-//*****************************
+/* initialization */
+void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET);   /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
+    LZ4_streamHCPtr->internal_donotuse.base = NULL;
+    if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;  /* cap compression level */
+    LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
+    LZ4_streamHCPtr->internal_donotuse.searchNum = LZ4HC_getSearchNum(compressionLevel);
+}
+
+void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    int const currentCLevel = LZ4_streamHCPtr->internal_donotuse.compressionLevel;
+    int const minCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? 1 : LZ4HC_CLEVEL_OPT_MIN;
+    int const maxCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? LZ4HC_CLEVEL_OPT_MIN-1 : LZ4HC_CLEVEL_MAX;
+    compressionLevel = MIN(compressionLevel, minCLevel);
+    compressionLevel = MAX(compressionLevel, maxCLevel);
+    LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
+}
+
+int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
+{
+    LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    if (dictSize > 64 KB) {
+        dictionary += dictSize - 64 KB;
+        dictSize = 64 KB;
+    }
+    LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
+    ctxPtr->end = (const BYTE*)dictionary + dictSize;
+    if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
+        LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
+    else
+        if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
+    return dictSize;
+}
+
+
+/* compression */
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
+{
+    if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
+        LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
+    else
+        if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
+
+    /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
+    ctxPtr->lowLimit  = ctxPtr->dictLimit;
+    ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+    ctxPtr->dictBase  = ctxPtr->base;
+    ctxPtr->base = newBlock - ctxPtr->dictLimit;
+    ctxPtr->end  = newBlock;
+    ctxPtr->nextToUpdate = ctxPtr->dictLimit;   /* match referencing will resume from there */
+}
+
+static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
+                                            const char* src, char* dst,
+                                            int* srcSizePtr, int dstCapacity,
+                                            limitedOutput_directive limit)
+{
+    LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    /* auto-init if forgotten */
+    if (ctxPtr->base == NULL) LZ4HC_init (ctxPtr, (const BYTE*) src);
+
+    /* Check overflow */
+    if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
+        size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
+        if (dictSize > 64 KB) dictSize = 64 KB;
+        LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
+    }
+
+    /* Check if blocks follow each other */
+    if ((const BYTE*)src != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src);
+
+    /* Check overlapping input/dictionary space */
+    {   const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr;
+        const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+        const BYTE* const dictEnd   = ctxPtr->dictBase + ctxPtr->dictLimit;
+        if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) {
+            if (sourceEnd > dictEnd) sourceEnd = dictEnd;
+            ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+            if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
+        }
+    }
+
+    return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+    if (dstCapacity < LZ4_compressBound(srcSize))
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
+    else
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, noLimit);
+}
+
+int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
+{
+    LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN) LZ4HC_init(ctxPtr, (const BYTE*)src);   /* not compatible with btopt implementation */
+    return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, limitedDestSize);
+}
 
-int LZ4_sizeofStateHC() { return sizeof(LZ4HC_Data_Structure); }
 
 
-int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize)
+/* dictionary saving */
+
+int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
 {
-    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   // Error : state is not aligned for pointers (32 or 64 bits)
-    LZ4_initHC ((LZ4HC_Data_Structure*)state, (const BYTE*)source);
-    return LZ4HC_compress_generic (state, source, dest, inputSize, 0, noLimit);
+    LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+    int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
+    if (dictSize > 64 KB) dictSize = 64 KB;
+    if (dictSize < 4) dictSize = 0;
+    if (dictSize > prefixSize) dictSize = prefixSize;
+    memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+    {   U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+        streamPtr->end = (const BYTE*)safeBuffer + dictSize;
+        streamPtr->base = streamPtr->end - endIndex;
+        streamPtr->dictLimit = endIndex - dictSize;
+        streamPtr->lowLimit = endIndex - dictSize;
+        if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit;
+    }
+    return dictSize;
 }
 
 
-int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize)
+/***********************************
+*  Deprecated Functions
+***********************************/
+/* These functions currently generate deprecation warnings */
+/* Deprecated compression functions */
+int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
+
+
+/* Deprecated streaming functions */
+int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
+
+int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
 {
-    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   // Error : state is not aligned for pointers (32 or 64 bits)
-    LZ4_initHC ((LZ4HC_Data_Structure*)state, (const BYTE*)source);
-    return LZ4HC_compress_generic (state, source, dest, inputSize, maxOutputSize, limitedOutput);
+    LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
+    if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1;   /* Error : pointer is not aligned for pointer (32 or 64 bits) */
+    LZ4HC_init(ctx, (const BYTE*)inputBuffer);
+    ctx->inputBuffer = (BYTE*)inputBuffer;
+    return 0;
 }
 
+void* LZ4_createHC (char* inputBuffer)
+{
+    LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOCATOR(1, sizeof(LZ4_streamHC_t));
+    if (hc4 == NULL) return NULL;   /* not enough memory */
+    LZ4HC_init (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
+    hc4->internal_donotuse.inputBuffer = (BYTE*)inputBuffer;
+    return hc4;
+}
 
-//****************************
-// Stream functions
-//****************************
+int LZ4_freeHC (void* LZ4HC_Data) {
+    if (!LZ4HC_Data) return 0;  /* support free on NULL */
+    FREEMEM(LZ4HC_Data);
+    return 0;
+}
 
-int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize)
+int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
 {
-    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, noLimit);
+    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, noLimit);
 }
 
-int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
+int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
 {
-    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, limitedOutput);
+    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
 }
 
+char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
+{
+    LZ4HC_CCtx_internal* const hc4 = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
+    int const dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
+    return (char*)(hc4->inputBuffer + dictSize);
+}
diff --git a/lz4/lz4hc.h b/lz4/lz4hc.h
index 4fb1916..66d5636 100644
--- a/lz4/lz4hc.h
+++ b/lz4/lz4hc.h
@@ -1,7 +1,7 @@
 /*
    LZ4 HC - High Compression Mode of LZ4
    Header File
-   Copyright (C) 2011-2013, Yann Collet.
+   Copyright (C) 2011-2017, Yann Collet.
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
@@ -28,130 +28,251 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
+   - LZ4 source repository : https://github.com/lz4/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
-#pragma once
-
+#ifndef LZ4_HC_H_19834876238432
+#define LZ4_HC_H_19834876238432
 
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
+/* --- Dependency --- */
+/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
+#include "lz4.h"   /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
 
-int LZ4_compressHC (const char* source, char* dest, int inputSize);
-/*
-LZ4_compressHC :
-    return : the number of bytes in compressed buffer dest
-             or 0 if compression fails.
-    note : destination buffer must be already allocated.
-        To avoid any problem, size it to handle worst cases situations (input data not compressible)
-        Worst case size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
-*/
 
-int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
-/*
-LZ4_compress_limitedOutput() :
-    Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-    If it cannot achieve it, compression will stop, and result of the function will be zero.
-    This function never writes outside of provided output buffer.
-
-    inputSize  : Max supported value is 1 GB
-    maxOutputSize : is maximum allowed size into the destination buffer (which must be already allocated)
-    return : the number of output bytes written in buffer 'dest'
-             or 0 if compression fails.
-*/
+/* --- Useful constants --- */
+#define LZ4HC_CLEVEL_MIN         3
+#define LZ4HC_CLEVEL_DEFAULT     9
+#define LZ4HC_CLEVEL_OPT_MIN    11
+#define LZ4HC_CLEVEL_MAX        12
+
+
+/*-************************************
+ *  Block Compression
+ **************************************/
+/*! LZ4_compress_HC() :
+ * Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm.
+ * `dst` must be already allocated.
+ * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
+ * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
+ * `compressionLevel` : Recommended values are between 4 and 9, although any value between 1 and LZ4HC_CLEVEL_MAX will work.
+ *                      Values >LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
+ * @return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ */
+LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
 
 
 /* Note :
-Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license)
-*/
+ *   Decompression functions are provided within "lz4.h" (BSD license)
+ */
 
 
-//*****************************
-// Using an external allocation
-//*****************************
-int LZ4_sizeofStateHC();
-int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
-int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+/*! LZ4_compress_HC_extStateHC() :
+ * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
+ * `state` size is provided by LZ4_sizeofStateHC().
+ * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
+ */
+LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+LZ4LIB_API int LZ4_sizeofStateHC(void);
 
-/*
-These functions are provided should you prefer to allocate memory for compression tables with your own allocation methods.
-To know how much memory must be allocated for the compression tables, use :
-int LZ4_sizeofStateHC();
 
-Note that tables must be aligned for pointer (32 or 64 bits), otherwise compression will fail (return code 0).
+/*-************************************
+ *  Streaming Compression
+ *  Bufferless synchronous API
+ **************************************/
+ typedef union LZ4_streamHC_u LZ4_streamHC_t;   /* incomplete type (defined later) */
 
-The allocated memory can be provided to the compressions functions using 'void* state' parameter.
-LZ4_compress_withStateHC() and LZ4_compress_limitedOutput_withStateHC() are equivalent to previously described functions.
-They just use the externally allocated memory area instead of allocating their own (on stack, or on heap).
-*/
+/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
+ * These functions create and release memory for LZ4 HC streaming state.
+ * Newly created states are automatically initialized.
+ * Existing states can be re-used several times, using LZ4_resetStreamHC().
+ * These methods are API and ABI stable, they can be used in combination with a DLL.
+ */
+LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
+LZ4LIB_API int             LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
 
+LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
+LZ4LIB_API int  LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
 
-//****************************
-// Streaming Functions
-//****************************
+LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
 
-void* LZ4_createHC (const char* inputBuffer);
-int   LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize);
-int   LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize);
-char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
-int   LZ4_freeHC (void* LZ4HC_Data);
+LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
 
 /*
-These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks.
-In order to achieve this, it is necessary to start creating the LZ4HC Data Structure, thanks to the function :
-
-void* LZ4_createHC (const char* inputBuffer);
-The result of the function is the (void*) pointer on the LZ4HC Data Structure.
-This pointer will be needed in all other functions.
-If the pointer returned is NULL, then the allocation has failed, and compression must be aborted.
-The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
-The input buffer must be already allocated, and size at least 192KB.
-'inputBuffer' will also be the 'const char* source' of the first block.
-
-All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'.
-To compress each block, use either LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue().
-Their behavior are identical to LZ4_compressHC() or LZ4_compressHC_limitedOutput(),
-but require the LZ4HC Data Structure as their first argument, and check that each block starts right after the previous one.
-If next block does not begin immediately after the previous one, the compression will fail (return 0).
-
-When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to :
-char* LZ4_slideInputBufferHC(void* LZ4HC_Data);
-must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer.
-Note that, for this function to work properly, minimum size of an input buffer must be 192KB.
-==> The memory position where the next input data block must start is provided as the result of the function.
-
-Compression can then resume, using LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue(), as usual.
-
-When compression is completed, a call to LZ4_freeHC() will release the memory used by the LZ4HC Data Structure.
+  These functions compress data in successive blocks of any size, using previous blocks as dictionary.
+  One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
+  There is an exception for ring buffers, which can be smaller than 64 KB.
+  Ring buffers scenario is automatically detected and handled by LZ4_compress_HC_continue().
+
+  Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
+  A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).
+
+  Then, use LZ4_compress_HC_continue() to compress each successive block.
+  Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
+  'dst' buffer should be sized to handle worst case scenarios (see LZ4_compressBound()), to ensure operation success.
+  Because in case of failure, the API does not guarantee context recovery, and context will have to be reset.
+  If `dst` buffer budget cannot be >= LZ4_compressBound(), consider using LZ4_compress_HC_continue_destSize() instead.
+
+  If, for any reason, previous data block can't be preserved unmodified in memory for next compression block,
+  you can save it to a more stable memory space, using LZ4_saveDictHC().
+  Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer'.
 */
 
-int LZ4_sizeofStreamStateHC();
-int LZ4_resetStreamStateHC(void* state, const char* inputBuffer);
-
-/*
-These functions achieve the same result as :
-void* LZ4_createHC (const char* inputBuffer);
 
-They are provided here to allow the user program to allocate memory using its own routines.
+ /*-*************************************
+ * PRIVATE DEFINITIONS :
+ * Do not use these definitions.
+ * They are exposed to allow static allocation of `LZ4_streamHC_t`.
+ * Using these definitions makes the code vulnerable to potential API break when upgrading LZ4
+ **************************************/
+#define LZ4HC_DICTIONARY_LOGSIZE 17   /* because of btopt, hc would only need 16 */
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+
+#define LZ4HC_HASH_LOG 15
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#include <stdint.h>
+
+typedef struct
+{
+    uint32_t   hashTable[LZ4HC_HASHTABLESIZE];
+    uint16_t   chainTable[LZ4HC_MAXD];
+    const uint8_t* end;         /* next block here to continue on current prefix */
+    const uint8_t* base;        /* All index relative to this position */
+    const uint8_t* dictBase;    /* alternate base for extDict */
+    uint8_t* inputBuffer;       /* deprecated */
+    uint32_t   dictLimit;       /* below that point, need extDict */
+    uint32_t   lowLimit;        /* below that point, no more dict */
+    uint32_t   nextToUpdate;    /* index from which to continue dictionary update */
+    uint32_t   searchNum;       /* only for optimal parser */
+    uint32_t   compressionLevel;
+} LZ4HC_CCtx_internal;
+
+#else
+
+typedef struct
+{
+    unsigned int   hashTable[LZ4HC_HASHTABLESIZE];
+    unsigned short chainTable[LZ4HC_MAXD];
+    const unsigned char* end;        /* next block here to continue on current prefix */
+    const unsigned char* base;       /* All index relative to this position */
+    const unsigned char* dictBase;   /* alternate base for extDict */
+    unsigned char* inputBuffer;      /* deprecated */
+    unsigned int   dictLimit;        /* below that point, need extDict */
+    unsigned int   lowLimit;         /* below that point, no more dict */
+    unsigned int   nextToUpdate;     /* index from which to continue dictionary update */
+    unsigned int   searchNum;        /* only for optimal parser */
+    int            compressionLevel;
+} LZ4HC_CCtx_internal;
 
-To know how much space must be allocated, use LZ4_sizeofStreamStateHC();
-Note also that space must be aligned for pointers (32 or 64 bits).
+#endif
 
-Once space is allocated, you must initialize it using : LZ4_resetStreamStateHC(void* state, const char* inputBuffer);
-void* state is a pointer to the space allocated.
-It must be aligned for pointers (32 or 64 bits), and be large enough.
-The parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
-The input buffer must be already allocated, and size at least 192KB.
-'inputBuffer' will also be the 'const char* source' of the first block.
+#define LZ4_STREAMHCSIZE       (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 393268 */
+#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
+union LZ4_streamHC_u {
+    size_t table[LZ4_STREAMHCSIZE_SIZET];
+    LZ4HC_CCtx_internal internal_donotuse;
+};   /* previously typedef'd to LZ4_streamHC_t */
+/*
+  LZ4_streamHC_t :
+  This structure allows static allocation of LZ4 HC streaming state.
+  State must be initialized using LZ4_resetStreamHC() before first use.
 
-The same space can be re-used multiple times, just by initializing it each time with LZ4_resetStreamState().
-return value of LZ4_resetStreamStateHC() must be 0 is OK.
-Any other value means there was an error (typically, state is not aligned for pointers (32 or 64 bits)).
+  Static allocation shall only be used in combination with static linking.
+  When invoking LZ4 from a DLL, use create/free functions instead, which are API and ABI stable.
 */
 
 
+/*-************************************
+*  Deprecated Functions
+**************************************/
+/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
+
+/* deprecated compression functions */
+/* these functions will trigger warning messages in future releases */
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC               (const char* source, char* dest, int inputSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Deprecated Streaming functions using older model; should no longer be used */
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_saveDictHC() instead")     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_freeStreamHC() instead")   int   LZ4_freeHC (void* LZ4HC_Data);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int   LZ4_sizeofStreamStateHC(void);
+LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStreamHC() instead")  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+
+
 #if defined (__cplusplus)
 }
 #endif
+
+#endif /* LZ4_HC_H_19834876238432 */
+
+/*-************************************************
+ * !!!!!     STATIC LINKING ONLY     !!!!!
+ * Following definitions are considered experimental.
+ * They should not be linked from DLL,
+ * as there is no guarantee of API stability yet.
+ * Prototypes will be promoted to "stable" status
+ * after successfull usage in real-life scenarios.
+ *************************************************/
+#ifdef LZ4_HC_STATIC_LINKING_ONLY   /* protection macro */
+#ifndef LZ4_HC_SLO_098092834
+#define LZ4_HC_SLO_098092834
+
+/*! LZ4_compress_HC_destSize() : v1.8.0 (experimental)
+ *  Will try to compress as much data from `src` as possible
+ *  that can fit into `targetDstSize` budget.
+ *  Result is provided in 2 parts :
+ * @return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`
+ */
+int LZ4_compress_HC_destSize(void* LZ4HC_Data,
+                            const char* src, char* dst,
+                            int* srcSizePtr, int targetDstSize,
+                            int compressionLevel);
+
+/*! LZ4_compress_HC_continue_destSize() : v1.8.0 (experimental)
+ *  Similar as LZ4_compress_HC_continue(),
+ *  but will read a variable nb of bytes from `src`
+ *  to fit into `targetDstSize` budget.
+ *  Result is provided in 2 parts :
+ * @return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`.
+ *  Important : due to limitations, this prototype only works well up to cLevel < LZ4HC_CLEVEL_OPT_MIN
+ *              beyond that level, compression performance will be much reduced due to internal incompatibilities
+ */
+int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
+                            const char* src, char* dst,
+                            int* srcSizePtr, int targetDstSize);
+
+/*! LZ4_setCompressionLevel() : v1.8.0 (experimental)
+ *  It's possible to change compression level after LZ4_resetStreamHC(), between 2 invocations of LZ4_compress_HC_continue*(),
+ *  but that requires to stay in the same mode (aka 1-10 or 11-12).
+ *  This function ensures this condition.
+ */
+void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+
+
+#endif   /* LZ4_HC_SLO_098092834 */
+#endif   /* LZ4_HC_STATIC_LINKING_ONLY */
diff --git a/lz4/lz4opt.h b/lz4/lz4opt.h
new file mode 100644
index 0000000..e9e54d8
--- /dev/null
+++ b/lz4/lz4opt.h
@@ -0,0 +1,366 @@
+/*
+    lz4opt.h - Optimal Mode of LZ4
+    Copyright (C) 2015-2017, Przemyslaw Skibinski <inikep@gmail.com>
+    Note : this file is intended to be included within lz4hc.c
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - LZ4 source repository : https://github.com/lz4/lz4
+       - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#define LZ4_OPT_NUM   (1<<12)
+
+
+typedef struct {
+    int off;
+    int len;
+} LZ4HC_match_t;
+
+typedef struct {
+    int price;
+    int off;
+    int mlen;
+    int litlen;
+} LZ4HC_optimal_t;
+
+
+/* price in bytes */
+FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen)
+{
+    size_t price = litlen;
+    if (litlen >= (size_t)RUN_MASK)
+        price += 1 + (litlen-RUN_MASK)/255;
+    return price;
+}
+
+
+/* requires mlen >= MINMATCH */
+FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen)
+{
+    size_t price = 2 + 1; /* 16-bit offset + token */
+
+    price += LZ4HC_literalsPrice(litlen);
+
+    if (mlen >= (size_t)(ML_MASK+MINMATCH))
+        price+= 1 + (mlen-(ML_MASK+MINMATCH))/255;
+
+    return price;
+}
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches (
+    LZ4HC_CCtx_internal* ctx,
+    const BYTE* const ip,
+    const BYTE* const iHighLimit,
+    size_t best_mlen,
+    LZ4HC_match_t* matches,
+    int* matchNum)
+{
+    U16* const chainTable = ctx->chainTable;
+    U32* const HashTable = ctx->hashTable;
+    const BYTE* const base = ctx->base;
+    const U32 dictLimit = ctx->dictLimit;
+    const U32 current = (U32)(ip - base);
+    const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1);
+    const BYTE* const dictBase = ctx->dictBase;
+    const BYTE* match;
+    int nbAttempts = ctx->searchNum;
+    int mnum = 0;
+    U16 *ptr0, *ptr1, delta0, delta1;
+    U32 matchIndex;
+    size_t matchLength = 0;
+    U32* HashPos;
+
+    if (ip + MINMATCH > iHighLimit) return 1;
+
+    /* HC4 match finder */
+    HashPos = &HashTable[LZ4HC_hashPtr(ip)];
+    matchIndex = *HashPos;
+    *HashPos = current;
+
+    ptr0 = &DELTANEXTMAXD(current*2+1);
+    ptr1 = &DELTANEXTMAXD(current*2);
+    delta0 = delta1 = (U16)(current - matchIndex);
+
+    while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) {
+        nbAttempts--;
+        if (matchIndex >= dictLimit) {
+            match = base + matchIndex;
+            matchLength = LZ4_count(ip, match, iHighLimit);
+        } else {
+            const BYTE* vLimit = ip + (dictLimit - matchIndex);
+            match = dictBase + matchIndex;
+            if (vLimit > iHighLimit) vLimit = iHighLimit;
+            matchLength = LZ4_count(ip, match, vLimit);
+            if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
+                matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+        }
+
+        if (matchLength > best_mlen) {
+            best_mlen = matchLength;
+            if (matches) {
+                if (matchIndex >= dictLimit)
+                    matches[mnum].off = (int)(ip - match);
+                else
+                    matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */
+                matches[mnum].len = (int)matchLength;
+                mnum++;
+            }
+            if (best_mlen > LZ4_OPT_NUM) break;
+        }
+
+        if (ip+matchLength >= iHighLimit)   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
+
+        DEBUGLOG(6, "ip   :%016llX", (U64)ip);
+        DEBUGLOG(6, "match:%016llX", (U64)match);
+        if (*(ip+matchLength) < *(match+matchLength)) {
+            *ptr0 = delta0;
+            ptr0 = &DELTANEXTMAXD(matchIndex*2);
+            if (*ptr0 == (U16)-1) break;
+            delta0 = *ptr0;
+            delta1 += delta0;
+            matchIndex -= delta0;
+        } else {
+            *ptr1 = delta1;
+            ptr1 = &DELTANEXTMAXD(matchIndex*2+1);
+            if (*ptr1 == (U16)-1) break;
+            delta1 = *ptr1;
+            delta0 += delta1;
+            matchIndex -= delta1;
+        }
+    }
+
+    *ptr0 = (U16)-1;
+    *ptr1 = (U16)-1;
+    if (matchNum) *matchNum = mnum;
+  /*  if (best_mlen > 8) return best_mlen-8; */
+    if (!matchNum) return 1;
+    return 1;
+}
+
+
+FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit)
+{
+    const BYTE* const base = ctx->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ctx->nextToUpdate;
+    while(idx < target)
+        idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL);
+}
+
+
+/** Tree updater, providing best match */
+FORCE_INLINE int LZ4HC_BinTree_GetAllMatches (
+                        LZ4HC_CCtx_internal* ctx,
+                        const BYTE* const ip, const BYTE* const iHighLimit,
+                        size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate)
+{
+    int mnum = 0;
+    if (ip < ctx->base + ctx->nextToUpdate) return 0;   /* skipped area */
+    if (fullUpdate) LZ4HC_updateBinTree(ctx, ip, iHighLimit);
+    best_mlen = LZ4HC_BinTree_InsertAndGetAllMatches(ctx, ip, iHighLimit, best_mlen, matches, &mnum);
+    ctx->nextToUpdate = (U32)(ip - ctx->base + best_mlen);
+    return mnum;
+}
+
+
+#define SET_PRICE(pos, ml, offset, ll, cost)           \
+{                                                      \
+    while (last_pos < pos)  { opt[last_pos+1].price = 1<<30; last_pos++; } \
+    opt[pos].mlen = (int)ml;                           \
+    opt[pos].off = (int)offset;                        \
+    opt[pos].litlen = (int)ll;                         \
+    opt[pos].price = (int)cost;                        \
+}
+
+
+static int LZ4HC_compress_optimal (
+    LZ4HC_CCtx_internal* ctx,
+    const char* const source,
+    char* dest,
+    int inputSize,
+    int maxOutputSize,
+    limitedOutput_directive limit,
+    size_t sufficient_len,
+    const int fullUpdate
+    )
+{
+    LZ4HC_optimal_t opt[LZ4_OPT_NUM + 1];   /* this uses a bit too much stack memory to my taste ... */
+    LZ4HC_match_t matches[LZ4_OPT_NUM + 1];
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+
+    /* init */
+    DEBUGLOG(5, "LZ4HC_compress_optimal");
+    if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
+    ctx->end += inputSize;
+    ip++;
+
+    /* Main Loop */
+    while (ip < mflimit) {
+        size_t const llen = ip - anchor;
+        size_t last_pos = 0;
+        size_t match_num, cur, best_mlen, best_off;
+        memset(opt, 0, sizeof(LZ4HC_optimal_t));  /* memset only the first one */
+
+        match_num = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate);
+        if (!match_num) { ip++; continue; }
+
+        if ((size_t)matches[match_num-1].len > sufficient_len) {
+            /* good enough solution : immediate encoding */
+            best_mlen = matches[match_num-1].len;
+            best_off = matches[match_num-1].off;
+            cur = 0;
+            last_pos = 1;
+            goto encode;
+        }
+
+        /* set prices using matches at position = 0 */
+        {   size_t matchNb;
+            for (matchNb = 0; matchNb < match_num; matchNb++) {
+                size_t mlen = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH;
+                best_mlen = matches[matchNb].len;   /* necessarily < sufficient_len < LZ4_OPT_NUM */
+                for ( ; mlen <= best_mlen ; mlen++) {
+                    size_t const cost = LZ4HC_sequencePrice(llen, mlen) - LZ4HC_literalsPrice(llen);
+                    SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost);   /* updates last_pos and opt[pos] */
+        }   }   }
+
+        if (last_pos < MINMATCH) { ip++; continue; }  /* note : on clang at least, this test improves performance */
+
+        /* check further positions */
+        opt[0].mlen = opt[1].mlen = 1;
+        for (cur = 1; cur <= last_pos; cur++) {
+            const BYTE* const curPtr = ip + cur;
+
+            /* establish baseline price if cur is literal */
+            {   size_t price, litlen;
+                if (opt[cur-1].mlen == 1) {
+                    /* no match at previous position */
+                    litlen = opt[cur-1].litlen + 1;
+                    if (cur > litlen) {
+                        price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen);
+                    } else {
+                        price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen);
+                    }
+                } else {
+                    litlen = 1;
+                    price = opt[cur - 1].price + LZ4HC_literalsPrice(1);
+                }
+
+                if (price < (size_t)opt[cur].price)
+                    SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price);   /* note : increases last_pos */
+            }
+
+            if (cur == last_pos || curPtr >= mflimit) break;
+
+            match_num = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate);
+            if ((match_num > 0) && (size_t)matches[match_num-1].len > sufficient_len) {
+                /* immediate encoding */
+                best_mlen = matches[match_num-1].len;
+                best_off = matches[match_num-1].off;
+                last_pos = cur + 1;
+                goto encode;
+            }
+
+            /* set prices using matches at position = cur */
+            {   size_t matchNb;
+                for (matchNb = 0; matchNb < match_num; matchNb++) {
+                    size_t ml = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH;
+                    best_mlen = (cur + matches[matchNb].len < LZ4_OPT_NUM) ?
+                                (size_t)matches[matchNb].len : LZ4_OPT_NUM - cur;
+
+                    for ( ; ml <= best_mlen ; ml++) {
+                        size_t ll, price;
+                        if (opt[cur].mlen == 1) {
+                            ll = opt[cur].litlen;
+                            if (cur > ll)
+                                price = opt[cur - ll].price + LZ4HC_sequencePrice(ll, ml);
+                            else
+                                price = LZ4HC_sequencePrice(llen + ll, ml) - LZ4HC_literalsPrice(llen);
+                        } else {
+                            ll = 0;
+                            price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
+                        }
+
+                        if (cur + ml > last_pos || price < (size_t)opt[cur + ml].price) {
+                            SET_PRICE(cur + ml, ml, matches[matchNb].off, ll, price);
+            }   }   }   }
+        } /* for (cur = 1; cur <= last_pos; cur++) */
+
+        best_mlen = opt[last_pos].mlen;
+        best_off = opt[last_pos].off;
+        cur = last_pos - best_mlen;
+
+encode: /* cur, last_pos, best_mlen, best_off must be set */
+        opt[0].mlen = 1;
+        while (1) {  /* from end to beginning */
+            size_t const ml = opt[cur].mlen;
+            int const offset = opt[cur].off;
+            opt[cur].mlen = (int)best_mlen;
+            opt[cur].off = (int)best_off;
+            best_mlen = ml;
+            best_off = offset;
+            if (ml > cur) break;   /* can this happen ? */
+            cur -= ml;
+        }
+
+        /* encode all recorded sequences */
+        cur = 0;
+        while (cur < last_pos) {
+            int const ml = opt[cur].mlen;
+            int const offset = opt[cur].off;
+            if (ml == 1) { ip++; cur++; continue; }
+            cur += ml;
+            if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) return 0;
+        }
+    }  /* while (ip < mflimit) */
+
+    /* Encode Last Literals */
+    {   int lastRun = (int)(iend - anchor);
+        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  /* Check output limit */
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    /* End */
+    return (int) ((char*)op-dest);
+}
diff --git a/lz4/xxhash.c b/lz4/xxhash.c
index 8304ec2..a532358 100644
--- a/lz4/xxhash.c
+++ b/lz4/xxhash.c
@@ -1,252 +1,313 @@
 /*
-xxHash - Fast Hash algorithm
-Copyright (C) 2012-2014, Yann Collet.
-BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-You can contact the author at :
-- xxHash source repository : http://code.google.com/p/xxhash/
+*  xxHash - Fast Hash algorithm
+*  Copyright (C) 2012-2016, Yann Collet
+*
+*  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+*
+*  Redistribution and use in source and binary forms, with or without
+*  modification, are permitted provided that the following conditions are
+*  met:
+*
+*  * Redistributions of source code must retain the above copyright
+*  notice, this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above
+*  copyright notice, this list of conditions and the following disclaimer
+*  in the documentation and/or other materials provided with the
+*  distribution.
+*
+*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*  You can contact the author at :
+*  - xxHash homepage: http://www.xxhash.com
+*  - xxHash source repository : https://github.com/Cyan4973/xxHash
 */
 
 
-//**************************************
-// Tuning parameters
-//**************************************
-// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
-// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
-// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
-// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
-#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-#  define XXH_USE_UNALIGNED_ACCESS 1
+/* *************************************
+*  Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ *            It can generate buggy code on targets which do not support unaligned memory accesses.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define XXH_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
 #endif
 
-// XXH_ACCEPT_NULL_INPUT_POINTER :
-// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
-// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
-// This option has a very small performance cost (only measurable on small inputs).
-// By default, this option is disabled. To enable it, uncomment below define :
-//#define XXH_ACCEPT_NULL_INPUT_POINTER 1
-
-// XXH_FORCE_NATIVE_FORMAT :
-// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
-// Results are therefore identical for little-endian and big-endian CPU.
-// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
-// Should endian-independance be of no importance for your application, you may set the #define below to 1.
-// It will improve speed for Big-endian CPU.
-// This option has no impact on Little_Endian CPU.
-#define XXH_FORCE_NATIVE_FORMAT 0
-
-
-//**************************************
-// Compiler Specific Options
-//**************************************
-// Disable some Visual warning messages
-#ifdef _MSC_VER  // Visual Studio
-#  pragma warning(disable : 4127)      // disable: C4127: conditional expression is constant
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/*!XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independence be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */
+#  define XXH_FORCE_NATIVE_FORMAT 0
 #endif
 
-#ifdef _MSC_VER    // Visual Studio
-#  define FORCE_INLINE static __forceinline
-#else 
-#  ifdef __GNUC__
-#    define FORCE_INLINE static inline __attribute__((always_inline))
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash; set to 0 when the input data
+ * is guaranteed to be aligned.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#    define XXH_FORCE_ALIGN_CHECK 0
 #  else
-#    define FORCE_INLINE static inline
+#    define XXH_FORCE_ALIGN_CHECK 1
 #  endif
 #endif
 
 
-//**************************************
-// Includes & Memory related functions
-//**************************************
-#include "xxhash.h"
-// Modify the local functions below should you wish to use some other memory related routines
-// for malloc(), free()
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+/*! Modify the local functions below should you wish to use some other memory routines
+*   for malloc(), free() */
 #include <stdlib.h>
-FORCE_INLINE void* XXH_malloc(size_t s) { return malloc(s); }
-FORCE_INLINE void  XXH_free  (void* p)  { free(p); }
-// for memcpy()
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+/*! and for memcpy() */
 #include <string.h>
-FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
-
-
-//**************************************
-// Basic Types
-//**************************************
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
-# include <stdint.h>
-  typedef uint8_t  BYTE;
-  typedef uint16_t U16;
-  typedef uint32_t U32;
-  typedef  int32_t S32;
-  typedef uint64_t U64;
-#else
-  typedef unsigned char      BYTE;
-  typedef unsigned short     U16;
-  typedef unsigned int       U32;
-  typedef   signed int       S32;
-  typedef unsigned long long U64;
-#endif
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
 
-#if defined(__GNUC__)  && !defined(XXH_USE_UNALIGNED_ACCESS)
-#  define _PACKED __attribute__ ((packed))
-#else
-#  define _PACKED
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
 #endif
 
-#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  ifdef __IBMC__
-#    pragma pack(1)
+#ifndef FORCE_INLINE
+#  ifdef _MSC_VER    /* Visual Studio */
+#    define FORCE_INLINE static __forceinline
 #  else
-#    pragma pack(push, 1)
-#  endif
+#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#      ifdef __GNUC__
+#        define FORCE_INLINE static inline __attribute__((always_inline))
+#      else
+#        define FORCE_INLINE static inline
+#      endif
+#    else
+#      define FORCE_INLINE static
+#    endif /* __STDC_VERSION__ */
+#  endif  /* _MSC_VER */
+#endif /* FORCE_INLINE */
+
+
+/* *************************************
+*  Basic Types
+***************************************/
+#ifndef MEM_MODULE
+# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint8_t  BYTE;
+    typedef uint16_t U16;
+    typedef uint32_t U32;
+    typedef  int32_t S32;
+# else
+    typedef unsigned char      BYTE;
+    typedef unsigned short     U16;
+    typedef unsigned int       U32;
+    typedef   signed int       S32;
+# endif
 #endif
 
-typedef struct _U32_S { U32 v; } _PACKED U32_S;
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
 
-#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  pragma pack(pop)
-#endif
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; } __attribute__((packed)) unalign;
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+
+#else
 
-#define A32(x) (((U32_S *)(x))->v)
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+static U32 XXH_read32(const void* memPtr)
+{
+    U32 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
 
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
 
-//***************************************
-// Compiler-specific Functions and Macros
-//***************************************
-#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 
-// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
 #if defined(_MSC_VER)
 #  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
 #else
 #  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
 #endif
 
-#if defined(_MSC_VER)     // Visual Studio
+#if defined(_MSC_VER)     /* Visual Studio */
 #  define XXH_swap32 _byteswap_ulong
-#elif GCC_VERSION >= 403
+#elif XXH_GCC_VERSION >= 403
 #  define XXH_swap32 __builtin_bswap32
 #else
-static inline U32 XXH_swap32 (U32 x) {
+static U32 XXH_swap32 (U32 x)
+{
     return  ((x << 24) & 0xff000000 ) |
-        ((x <<  8) & 0x00ff0000 ) |
-        ((x >>  8) & 0x0000ff00 ) |
-        ((x >> 24) & 0x000000ff );}
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
 #endif
 
 
-//**************************************
-// Constants
-//**************************************
-#define PRIME32_1   2654435761U
-#define PRIME32_2   2246822519U
-#define PRIME32_3   3266489917U
-#define PRIME32_4    668265263U
-#define PRIME32_5    374761393U
-
-
-//**************************************
-// Architecture Macros
-//**************************************
+/* *************************************
+*  Architecture Macros
+***************************************/
 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
-#ifndef XXH_CPU_LITTLE_ENDIAN   // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
-    static const int one = 1;
-#   define XXH_CPU_LITTLE_ENDIAN   (*(char*)(&one))
-#endif
-
 
-//**************************************
-// Macros
-//**************************************
-#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    // use only *after* variable declarations
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+    static const int g_one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
+#endif
 
 
-//****************************
-// Memory reads
-//****************************
+/* ***************************
+*  Memory reads
+*****************************/
 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
 
-FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align)
-{ 
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
     if (align==XXH_unaligned)
-        return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); 
+        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
     else
-        return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); 
+        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
 }
 
-FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); }
+static U32 XXH_readBE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+
+/* *************************************
+*  Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
 
 
-//****************************
-// Simple Hash Functions
-//****************************
-FORCE_INLINE U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align)
+/* *******************************************************************
+*  32-bits hash functions
+*********************************************************************/
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 =  668265263U;
+static const U32 PRIME32_5 =  374761393U;
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+    seed += input * PRIME32_2;
+    seed  = XXH_rotl32(seed, 13);
+    seed *= PRIME32_1;
+    return seed;
+}
+
+FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
-    const BYTE* const bEnd = p + len;
+    const BYTE* bEnd = p + len;
     U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
 
 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; }
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
 #endif
 
-    if (len>=16)
-    {
+    if (len>=16) {
         const BYTE* const limit = bEnd - 16;
         U32 v1 = seed + PRIME32_1 + PRIME32_2;
         U32 v2 = seed + PRIME32_2;
         U32 v3 = seed + 0;
         U32 v4 = seed - PRIME32_1;
 
-        do
-        {
-            v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
-            v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
-            v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
-            v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
+        do {
+            v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+            v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+            v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+            v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
         } while (p<=limit);
 
         h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
-    }
-    else
-    {
+    } else {
         h32  = seed + PRIME32_5;
     }
 
     h32 += (U32) len;
 
-    while (p<=bEnd-4)
-    {
-        h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3;
+    while (p+4<=bEnd) {
+        h32 += XXH_get32bits(p) * PRIME32_3;
         h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
         p+=4;
     }
 
-    while (p<bEnd)
-    {
+    while (p<bEnd) {
         h32 += (*p) * PRIME32_5;
         h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
         p++;
@@ -262,25 +323,24 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_en
 }
 
 
-U32 XXH32(const void* input, int len, U32 seed)
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
 {
 #if 0
-    // Simple version, good for code maintenance, but unfortunately slow for small inputs
-    void* state = XXH32_init(seed);
-    XXH32_update(state, input, len);
-    return XXH32_digest(state);
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, input, len);
+    return XXH32_digest(&state);
 #else
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
-#  if !defined(XXH_USE_UNALIGNED_ACCESS)
-    if ((((size_t)input) & 3))   // Input is aligned, let's leverage the speed advantage
-    {
-        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
-        else
-            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
-    }
-#  endif
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
 
     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
         return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
@@ -290,55 +350,39 @@ U32 XXH32(const void* input, int len, U32 seed)
 }
 
 
-//****************************
-// Advanced Hash Functions
-//****************************
-
-struct XXH_state32_t
-{
-    U64 total_len;
-    U32 seed;
-    U32 v1;
-    U32 v2;
-    U32 v3;
-    U32 v4;
-    int memsize;
-    char memory[16];
-};
 
+/*======   Hash streaming   ======*/
 
-int XXH32_sizeofState() 
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
 {
-    XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t));   // A compilation error here means XXH32_SIZEOFSTATE is not large enough
-    return sizeof(struct XXH_state32_t); 
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
 }
-
-
-XXH_errorcode XXH32_resetState(void* state_in, U32 seed)
-{ 
-    struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
-    state->seed = seed;
-    state->v1 = seed + PRIME32_1 + PRIME32_2;
-    state->v2 = seed + PRIME32_2;
-    state->v3 = seed + 0;
-    state->v4 = seed - PRIME32_1;
-    state->total_len = 0;
-    state->memsize = 0;
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
     return XXH_OK;
 }
 
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
 
-void* XXH32_init (U32 seed)
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
 {
-    void* state = XXH_malloc (sizeof(struct XXH_state32_t));
-    XXH32_resetState(state, seed);
-    return state;
+    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
+    state.v1 = seed + PRIME32_1 + PRIME32_2;
+    state.v2 = seed + PRIME32_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME32_1;
+    memcpy(statePtr, &state, sizeof(state));
+    return XXH_OK;
 }
 
 
-FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
-    struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
 
@@ -346,43 +390,39 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu
     if (input==NULL) return XXH_ERROR;
 #endif
 
-    state->total_len += len;
+    state->total_len_32 += (unsigned)len;
+    state->large_len |= (len>=16) | (state->total_len_32>=16);
 
-    if (state->memsize + len < 16)   // fill in tmp buffer
-    {
-        XXH_memcpy(state->memory + state->memsize, input, len);
-        state->memsize +=  len;
+    if (state->memsize + len < 16)  {   /* fill in tmp buffer */
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+        state->memsize += (unsigned)len;
         return XXH_OK;
     }
 
-    if (state->memsize)   // some data left from previous update
-    {
-        XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize);
-        {
-            const U32* p32 = (const U32*)state->memory;
-            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
-            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; 
-            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
-            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
+    if (state->memsize) {   /* some data left from previous update */
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+        {   const U32* p32 = state->mem32;
+            state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+            state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+            state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+            state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
         }
         p += 16-state->memsize;
         state->memsize = 0;
     }
 
-    if (p <= bEnd-16)
-    {
+    if (p <= bEnd-16) {
         const BYTE* const limit = bEnd - 16;
         U32 v1 = state->v1;
         U32 v2 = state->v2;
         U32 v3 = state->v3;
         U32 v4 = state->v4;
 
-        do
-        {
-            v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
-            v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
-            v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
-            v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
+        do {
+            v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+            v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+            v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+            v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
         } while (p<=limit);
 
         state->v1 = v1;
@@ -391,19 +431,18 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu
         state->v4 = v4;
     }
 
-    if (p < bEnd)
-    {
-        XXH_memcpy(state->memory, p, bEnd-p);
-        state->memsize = (int)(bEnd-p);
+    if (p < bEnd) {
+        XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+        state->memsize = (unsigned)(bEnd-p);
     }
 
     return XXH_OK;
 }
 
-XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-    
+
     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
         return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
     else
@@ -412,35 +451,29 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
 
 
 
-FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian)
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
 {
-    struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
-    const BYTE * p = (const BYTE*)state->memory;
-    BYTE* bEnd = (BYTE*)state->memory + state->memsize;
+    const BYTE * p = (const BYTE*)state->mem32;
+    const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
     U32 h32;
 
-    if (state->total_len >= 16)
-    {
+    if (state->large_len) {
         h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
-    }
-    else
-    {
-        h32  = state->seed + PRIME32_5;
+    } else {
+        h32 = state->v3 /* == seed */ + PRIME32_5;
     }
 
-    h32 += (U32) state->total_len;
+    h32 += state->total_len_32;
 
-    while (p<=bEnd-4)
-    {
-        h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3;
+    while (p+4<=bEnd) {
+        h32 += XXH_readLE32(p, endian) * PRIME32_3;
         h32  = XXH_rotl32(h32, 17) * PRIME32_4;
         p+=4;
     }
 
-    while (p<bEnd)
-    {
+    while (p<bEnd) {
         h32 += (*p) * PRIME32_5;
-        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+        h32  = XXH_rotl32(h32, 11) * PRIME32_1;
         p++;
     }
 
@@ -454,22 +487,408 @@ FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess
 }
 
 
-U32 XXH32_intermediateDigest (void* state_in)
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-    
+
     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian);
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
     else
-        return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian);
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
 }
 
 
-U32 XXH32_digest (void* state_in)
+/*======   Canonical representation   ======*/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+*   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+*   These functions allow transformation of hash result into and from its canonical format.
+*   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
 {
-    U32 h32 = XXH32_intermediateDigest(state_in);
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
 
-    XXH_free(state_in);
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+    return XXH_readBE32(src);
+}
 
-    return h32;
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+*  64-bits hash functions
+*********************************************************************/
+
+/*======   Memory access   ======*/
+
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint64_t U64;
+# else
+    typedef unsigned long long U64;   /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
+# endif
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64;
+static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U64 XXH_read64(const void* memPtr)
+{
+    U64 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
 }
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap64 _byteswap_uint64
+#elif XXH_GCC_VERSION >= 403
+#  define XXH_swap64 __builtin_bswap64
+#else
+static U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+static U64 XXH_readBE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+
+/*======   xxh64   ======*/
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 =  1609587929392839161ULL;
+static const U64 PRIME64_4 =  9650029242287828579ULL;
+static const U64 PRIME64_5 =  2870177450012600261ULL;
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+    acc += input * PRIME64_2;
+    acc  = XXH_rotl64(acc, 31);
+    acc *= PRIME64_1;
+    return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+    val  = XXH64_round(0, val);
+    acc ^= val;
+    acc  = acc * PRIME64_1 + PRIME64_4;
+    return acc;
+}
+
+FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32) {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do {
+            v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+            v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+            v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+            v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+        } while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+
+    } else {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_get64bits(p));
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd) {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, input, len);
+    return XXH64_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/*======   Hash Streaming   ======*/
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
+{
+    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
+    state.v1 = seed + PRIME64_1 + PRIME64_2;
+    state.v2 = seed + PRIME64_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME64_1;
+    memcpy(statePtr, &state, sizeof(state));
+    return XXH_OK;
+}
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 32) {  /* fill in tmp buffer */
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize) {   /* tmp buffer is full */
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+        state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+        state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+        state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+        state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p+32 <= bEnd) {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        do {
+            v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+            v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+            v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+            v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+        } while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd) {
+        XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+        state->memsize = (unsigned)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+{
+    const BYTE * p = (const BYTE*)state->mem64;
+    const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
+    U64 h64;
+
+    if (state->total_len >= 32) {
+        U64 const v1 = state->v1;
+        U64 const v2 = state->v2;
+        U64 const v3 = state->v3;
+        U64 const v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+    } else {
+        h64  = state->v3 + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd) {
+        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+        h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h64 ^= (*p) * PRIME64_5;
+        h64  = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/*====== Canonical representation   ======*/
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+    return XXH_readBE64(src);
+}
+
+#endif  /* XXH_NO_LONG_LONG */
diff --git a/lz4/xxhash.h b/lz4/xxhash.h
index a319bcc..870a6d9 100644
--- a/lz4/xxhash.h
+++ b/lz4/xxhash.h
@@ -1,20 +1,21 @@
 /*
-   xxHash - Fast Hash algorithm
+   xxHash - Extremely Fast Hash algorithm
    Header File
-   Copyright (C) 2012-2014, Yann Collet.
+   Copyright (C) 2012-2016, Yann Collet.
+
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
    met:
-  
+
        * Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
        * Redistributions in binary form must reproduce the above
    copyright notice, this list of conditions and the following disclaimer
    in the documentation and/or other materials provided with the
    distribution.
-  
+
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -28,7 +29,7 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - xxHash source repository : http://code.google.com/p/xxhash/
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
 */
 
 /* Notice extracted from xxHash homepage :
@@ -52,113 +53,241 @@ CRC32           0.43 GB/s     9
 MD5-32          0.33 GB/s    10       Ronald L. Rivest
 SHA1-32         0.28 GB/s    10
 
-Q.Score is a measure of quality of the hash function. 
-It depends on successfully passing SMHasher test set. 
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
 10 is a perfect score.
+
+A 64-bits version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bits applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
 */
 
-#pragma once
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
 
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
 
-//****************************
-// Type
-//****************************
+/* ****************************
+*  Definitions
+******************************/
+#include <stddef.h>   /* size_t */
 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 
 
+/* ****************************
+*  API modifier
+******************************/
+/** XXH_PRIVATE_API
+*   This is useful to include xxhash functions in `static` mode
+*   in order to inline them, and remove their symbol from the public list.
+*   Methodology :
+*     #define XXH_PRIVATE_API
+*     #include "xxhash.h"
+*   `xxhash.c` is automatically included.
+*   It's not useful to compile and link it as a separate module.
+*/
+#ifdef XXH_PRIVATE_API
+#  ifndef XXH_STATIC_LINKING_ONLY
+#    define XXH_STATIC_LINKING_ONLY
+#  endif
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+#    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
+#  endif
+#else
+#  define XXH_PUBLIC_API   /* do nothing */
+#endif /* XXH_PRIVATE_API */
+
+/*!XXH_NAMESPACE, aka Namespace Emulation :
+
+If you want to include _and expose_ xxHash functions from within your own library,
+but also want to avoid symbol collisions with other libraries which may also include xxHash,
+
+you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
+
+Note that no change is required within the calling program as long as it includes `xxhash.h` :
+regular symbol name will be automatically translated by this header.
+*/
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#endif
 
-//****************************
-// Simple Hash Functions
-//****************************
 
-unsigned int XXH32 (const void* input, int len, unsigned int seed);
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    6
+#define XXH_VERSION_RELEASE  2
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
 
-/*
-XXH32() :
-    Calculate the 32-bits hash of sequence of length "len" stored at memory address "input".
-    The memory between input & input+len must be valid (allocated and read-accessible).
-    "seed" can be used to alter the result predictably.
-    This function successfully passes all SMHasher tests.
-    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
-    Note that "len" is type "int", which means it is limited to 2^31-1.
-    If your data is larger, use the advanced functions below.
-*/
 
+/*-**********************************************************************
+*  32-bits hash
+************************************************************************/
+typedef unsigned int       XXH32_hash_t;
 
+/*! XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
 
-//****************************
-// Advanced Hash Functions
-//****************************
+/*======   Streaming   ======*/
+typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
 
-void*         XXH32_init   (unsigned int seed);
-XXH_errorcode XXH32_update (void* state, const void* input, int len);
-unsigned int  XXH32_digest (void* state);
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
 
 /*
-These functions calculate the xxhash of an input provided in several small packets,
-as opposed to an input provided as a single block.
+These functions generate the xxHash of an input provided in multiple segments.
+Note that, for small input, they are slower than single-call functions, due to state management.
+For small input, prefer `XXH32()` and `XXH64()` .
 
-It must be started with :
-void* XXH32_init()
-The function returns a pointer which holds the state of calculation.
-
-This pointer must be provided as "void* state" parameter for XXH32_update().
-XXH32_update() can be called as many times as necessary.
-The user must provide a valid (allocated) input.
-The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
-Note that "len" is type "int", which means it is limited to 2^31-1. 
-If your data is larger, it is recommended to chunk your data into blocks 
-of size for example 2^30 (1GB) to avoid any "int" overflow issue.
-
-Finally, you can end the calculation anytime, by using XXH32_digest().
-This function returns the final 32-bits hash.
-You must provide the same "void* state" parameter created by XXH32_init().
-Memory will be freed by XXH32_digest().
-*/
+XXH state must first be allocated, using XXH*_createState() .
 
+Start a new hash by initializing state with a seed, using XXH*_reset().
 
-int           XXH32_sizeofState();
-XXH_errorcode XXH32_resetState(void* state, unsigned int seed);
+Then, feed the hash state by calling XXH*_update() as many times as necessary.
+Obviously, input must be allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
 
-#define       XXH32_SIZEOFSTATE 48
-typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t;
-/*
-These functions allow user application to make its own allocation for state.
+Finally, a hash value can be produced anytime, by using XXH*_digest().
+This function returns the nn-bits hash as an int or long long.
 
-XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state.
-Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer.
-This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state.
+It's still possible to continue inserting input into the hash state after a digest,
+and generate some new hashes later on, by calling again XXH*_digest().
 
-For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()),
-use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields.
+When done, free XXH state space if it was allocated dynamically.
 */
 
+/*======   Canonical representation   ======*/
 
-unsigned int XXH32_intermediateDigest (void* state);
-/*
-This function does the same as XXH32_digest(), generating a 32-bit hash,
-but preserve memory context.
-This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update().
-To free memory context, use XXH32_digest(), or free().
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+*  These functions allow transformation of hash result into and from its canonical format.
+*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
 */
 
 
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+*  64-bits hash
+************************************************************************/
+typedef unsigned long long XXH64_hash_t;
 
-//****************************
-// Deprecated function names
-//****************************
-// The following translations are provided to ease code transition
-// You are encouraged to no longer this function names
-#define XXH32_feed   XXH32_update
-#define XXH32_result XXH32_digest
-#define XXH32_getIntermediateResult XXH32_intermediateDigest
+/*! XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+    "seed" can be used to alter the result predictably.
+    This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
+*/
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*======   Streaming   ======*/
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+
+/*======   Canonical representation   ======*/
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+#endif  /* XXH_NO_LONG_LONG */
+
+
+#ifdef XXH_STATIC_LINKING_ONLY
+
+/* ================================================================================================
+   This section contains definitions which are not guaranteed to remain stable.
+   They may change in future versions, becoming incompatible with a different version of the library.
+   They shall only be used with static linking.
+   Never use these definitions in association with dynamic linking !
+=================================================================================================== */
+
+/* These definitions are only meant to allow allocation of XXH state
+   statically, on stack, or in a struct for example.
+   Do not use members directly. */
+
+   struct XXH32_state_s {
+       unsigned total_len_32;
+       unsigned large_len;
+       unsigned v1;
+       unsigned v2;
+       unsigned v3;
+       unsigned v4;
+       unsigned mem32[4];   /* buffer defined as U32 for alignment */
+       unsigned memsize;
+       unsigned reserved;   /* never read nor write, will be removed in a future version */
+   };   /* typedef'd to XXH32_state_t */
+
+#ifndef XXH_NO_LONG_LONG
+   struct XXH64_state_s {
+       unsigned long long total_len;
+       unsigned long long v1;
+       unsigned long long v2;
+       unsigned long long v3;
+       unsigned long long v4;
+       unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
+       unsigned memsize;
+       unsigned reserved[2];          /* never read nor write, will be removed in a future version */
+   };   /* typedef'd to XXH64_state_t */
+#endif
 
+#  ifdef XXH_PRIVATE_API
+#    include "xxhash.c"   /* include xxhash function bodies as `static`, for inlining */
+#  endif
+
+#endif /* XXH_STATIC_LINKING_ONLY */
 
 
 #if defined (__cplusplus)
 }
 #endif
+
+#endif /* XXHASH_H_5627135585666179 */