Browse code

Update zstd and lz4 versions

Mike Smith authored on 29/11/2020 12:26:04
Showing97 changed files

... ...
@@ -2,6 +2,7 @@ on:
2 2
   push:
3 3
     branches:
4 4
       - master
5
+      - blosc-update
5 6
   pull_request:
6 7
     branches:
7 8
       - master
... ...
@@ -2,7 +2,7 @@ on:
2 2
   push:
3 3
     branches:
4 4
       - master
5
-      - arch-checks
5
+      - blosc-update
6 6
   pull_request:
7 7
     branches:
8 8
       - master
... ...
@@ -8,7 +8,7 @@ rm -f src/blosc/lib/blosc-*/*.o
8 8
 rm -f src/blosc/lib/snappy-*/*.o
9 9
 rm -f src/blosc/lib/lz4-*/*.o
10 10
 
11
-cd src/blosc/lib/zstd-1.3.8/
11
+cd src/blosc/lib/zstd-1.4.5/
12 12
 make clean
13 13
 
14 14
 rm -f src/blosc/libH5Zblosc.dll
15 15
\ No newline at end of file
... ...
@@ -9,17 +9,17 @@ BLOSC_OBJS=	@AVX2_OBJ_PATH@@AVX2_BITSHUFFLE_OBJ@ \
9 9
 			lib/blosc-1.20.1/fastcopy.o \
10 10
 			lib/blosc-1.20.1/shuffle-generic.o
11 11
 
12
-LZ4_OBJS=	lib/lz4-1.8.3/lz4.o \
13
-		 	lib/lz4-1.8.3/lz4hc.o
12
+LZ4_OBJS=	lib/lz4-1.9.2/lz4.o \
13
+		 	lib/lz4-1.9.2/lz4hc.o
14 14
 
15 15
 SNAPPY_OBJS=lib/snappy-1.1.1/snappy.o \
16 16
 			lib/snappy-1.1.1/snappy-c.o \
17 17
 			lib/snappy-1.1.1/snappy-sinksource.o \
18 18
 			lib/snappy-1.1.1/snappy-stubs-internal.o
19 19
 
20
-ZSTDCOMMON_FILES := $(sort $(wildcard lib/zstd-1.3.8/common/*.c))
21
-ZSTDCOMP_FILES := $(sort $(wildcard lib/zstd-1.3.8/compress/*.c))
22
-ZSTDDECOMP_FILES := $(sort $(wildcard lib/zstd-1.3.8/decompress/*.c))
20
+ZSTDCOMMON_FILES := $(sort $(wildcard lib/zstd-1.4.5/common/*.c))
21
+ZSTDCOMP_FILES := $(sort $(wildcard lib/zstd-1.4.5/compress/*.c))
22
+ZSTDDECOMP_FILES := $(sort $(wildcard lib/zstd-1.4.5/decompress/*.c))
23 23
 ZSTD_FILES := $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) $(ZSTDDECOMP_FILES)
24 24
 ZSTD_OBJS   := $(patsubst %.c,%.o,$(ZSTD_FILES))
25 25
 
... ...
@@ -44,7 +44,7 @@ libblosc.a:  blosc
44 44
 blosc: subdirs
45 45
 	$(MAKE) -C lib/blosc-1.20.1 all
46 46
 
47
-SUBDIRS = lib/snappy-1.1.1 lib/lz4-1.8.3 lib/zstd-1.3.8
47
+SUBDIRS = lib/snappy-1.1.1 lib/lz4-1.9.2 lib/zstd-1.4.5
48 48
 
49 49
 .PHONY: subdirs $(SUBDIRS)
50 50
 
... ...
@@ -58,5 +58,5 @@ clean:
58 58
 	rm -f libblosc.a
59 59
 	rm -f lib/blosc-1.20.1/*.o
60 60
 	rm -f lib/snappy-1.1.1/*.o
61
-	rm -f lib/lz4-1.8.3/*.o
62
-	$(MAKE) -C lib/zstd-1.3.8/ clean
61
+	rm -f lib/lz4-1.9.2/*.o
62
+	$(MAKE) -C lib/zstd-1.4.5/ clean
63 63
deleted file mode 100644
... ...
@@ -1,221 +0,0 @@
1
-# a simple way to detect that we are using CMAKE
2
-add_definitions(-DUSING_CMAKE)
3
-
4
-set(INTERNAL_LIBS ${PROJECT_SOURCE_DIR}/internal-complibs)
5
-
6
-# Hide symbols by default unless they're specifically exported.
7
-# This makes it easier to keep the set of exported symbols the
8
-# same across all compilers/platforms.
9
-set(CMAKE_C_VISIBILITY_PRESET hidden)
10
-
11
-# includes
12
-set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
13
-if(NOT DEACTIVATE_LZ4)
14
-    if (LZ4_FOUND)
15
-        set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${LZ4_INCLUDE_DIR})
16
-    else(LZ4_FOUND)
17
-        set(LZ4_LOCAL_DIR ${INTERNAL_LIBS}/lz4-1.8.3)
18
-        set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${LZ4_LOCAL_DIR})
19
-    endif(LZ4_FOUND)
20
-endif(NOT DEACTIVATE_LZ4)
21
-
22
-if(NOT DEACTIVATE_SNAPPY)
23
-    if (SNAPPY_FOUND)
24
-        set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${SNAPPY_INCLUDE_DIR})
25
-    else(SNAPPY_FOUND)
26
-        set(SNAPPY_LOCAL_DIR ${INTERNAL_LIBS}/snappy-1.1.1)
27
-        set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${SNAPPY_LOCAL_DIR})
28
-    endif(SNAPPY_FOUND)
29
-endif(NOT DEACTIVATE_SNAPPY)
30
-
31
-if(NOT DEACTIVATE_ZLIB)
32
-    if (ZLIB_FOUND)
33
-        set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIR})
34
-    else(ZLIB_FOUND)
35
-        set(ZLIB_LOCAL_DIR ${INTERNAL_LIBS}/zlib-1.2.8)
36
-        set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${ZLIB_LOCAL_DIR})
37
-    endif(ZLIB_FOUND)
38
-endif(NOT DEACTIVATE_ZLIB)
39
-
40
-if (NOT DEACTIVATE_ZSTD)
41
-    if (ZSTD_FOUND)
42
-        set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${ZSTD_INCLUDE_DIR})
43
-    else (ZSTD_FOUND)
44
-        set(ZSTD_LOCAL_DIR ${INTERNAL_LIBS}/zstd-1.3.8)
45
-        set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${ZSTD_LOCAL_DIR} ${ZSTD_LOCAL_DIR}/common)
46
-    endif (ZSTD_FOUND)
47
-endif (NOT DEACTIVATE_ZSTD)
48
-
49
-include_directories(${BLOSC_INCLUDE_DIRS})
50
-
51
-# library sources
52
-set(SOURCES blosc.c blosclz.c fastcopy.c shuffle-generic.c bitshuffle-generic.c
53
-        blosc-common.h blosc-export.h)
54
-if(COMPILER_SUPPORT_SSE2)
55
-    message(STATUS "Adding run-time support for SSE2")
56
-    set(SOURCES ${SOURCES} shuffle-sse2.c bitshuffle-sse2.c)
57
-endif(COMPILER_SUPPORT_SSE2)
58
-if(COMPILER_SUPPORT_AVX2)
59
-    message(STATUS "Adding run-time support for AVX2")
60
-    set(SOURCES ${SOURCES} shuffle-avx2.c bitshuffle-avx2.c)
61
-endif(COMPILER_SUPPORT_AVX2)
62
-set(SOURCES ${SOURCES} shuffle.c)
63
-
64
-# library install directory
65
-set(lib_dir lib${LIB_SUFFIX})
66
-set(version_string ${BLOSC_VERSION_MAJOR}.${BLOSC_VERSION_MINOR}.${BLOSC_VERSION_PATCH})
67
-
68
-set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
69
-if(WIN32)
70
-    # try to use the system library
71
-    find_package(Threads)
72
-    if(NOT Threads_FOUND)
73
-        message(STATUS "using the internal pthread library for win32 systems.")
74
-        set(SOURCES ${SOURCES} win32/pthread.c)
75
-    else(NOT Threads_FOUND)
76
-        set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT})
77
-    endif(NOT Threads_FOUND)
78
-else(WIN32)
79
-    find_package(Threads REQUIRED)
80
-    set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT})
81
-endif(WIN32)
82
-
83
-if(NOT DEACTIVATE_LZ4)
84
-    if(LZ4_FOUND)
85
-        set(LIBS ${LIBS} ${LZ4_LIBRARY})
86
-    else(LZ4_FOUND)
87
-        file(GLOB LZ4_FILES ${LZ4_LOCAL_DIR}/*.c)
88
-        set(SOURCES ${SOURCES} ${LZ4_FILES})
89
-    endif(LZ4_FOUND)
90
-endif(NOT DEACTIVATE_LZ4)
91
-
92
-if(NOT DEACTIVATE_SNAPPY)
93
-    if(SNAPPY_FOUND)
94
-        set(LIBS ${LIBS} ${SNAPPY_LIBRARY})
95
-    else(SNAPPY_FOUND)
96
-        file(GLOB SNAPPY_FILES ${SNAPPY_LOCAL_DIR}/*.cc)
97
-        set(SOURCES ${SOURCES} ${SNAPPY_FILES})
98
-    endif(SNAPPY_FOUND)
99
-endif(NOT DEACTIVATE_SNAPPY)
100
-
101
-if(NOT DEACTIVATE_ZLIB)
102
-    if(ZLIB_FOUND)
103
-        set(LIBS ${LIBS} ${ZLIB_LIBRARY})
104
-    else(ZLIB_FOUND)
105
-        file(GLOB ZLIB_FILES ${ZLIB_LOCAL_DIR}/*.c)
106
-        set(SOURCES ${SOURCES} ${ZLIB_FILES})
107
-    endif(ZLIB_FOUND)
108
-endif(NOT DEACTIVATE_ZLIB)
109
-
110
-if (NOT DEACTIVATE_ZSTD)
111
-    if (ZSTD_FOUND)
112
-        set(LIBS ${LIBS} ${ZSTD_LIBRARY})
113
-    else (ZSTD_FOUND)
114
-      file(GLOB ZSTD_FILES
115
-        ${ZSTD_LOCAL_DIR}/common/*.c
116
-        ${ZSTD_LOCAL_DIR}/compress/*.c
117
-        ${ZSTD_LOCAL_DIR}/decompress/*.c)
118
-        set(SOURCES ${SOURCES} ${ZSTD_FILES})
119
-    endif (ZSTD_FOUND)
120
-endif (NOT DEACTIVATE_ZSTD)
121
-
122
-
123
-# targets
124
-if (BUILD_SHARED)
125
-    add_library(blosc_shared SHARED ${SOURCES})
126
-    set_target_properties(blosc_shared PROPERTIES OUTPUT_NAME blosc)
127
-    set_target_properties(blosc_shared PROPERTIES
128
-            VERSION ${version_string}
129
-            SOVERSION 1  # Change this when an ABI change happens
130
-        )
131
-    set_property(
132
-        TARGET blosc_shared
133
-        APPEND PROPERTY COMPILE_DEFINITIONS BLOSC_SHARED_LIBRARY)
134
-endif()
135
-
136
-# Based on the target architecture and hardware features supported
137
-# by the C compiler, set hardware architecture optimization flags
138
-# for specific shuffle implementations.
139
-if(COMPILER_SUPPORT_SSE2)
140
-    if (MSVC)
141
-        # MSVC targets SSE2 by default on 64-bit configurations, but not 32-bit configurations.
142
-        if (${CMAKE_SIZEOF_VOID_P} EQUAL 4)
143
-            set_source_files_properties(shuffle-sse2.c bitshuffle-sse2.c PROPERTIES COMPILE_FLAGS "/arch:SSE2")
144
-        endif (${CMAKE_SIZEOF_VOID_P} EQUAL 4)
145
-    else (MSVC)
146
-        set_source_files_properties(shuffle-sse2.c bitshuffle-sse2.c PROPERTIES COMPILE_FLAGS -msse2)
147
-    endif (MSVC)
148
-
149
-    # Define a symbol for the shuffle-dispatch implementation
150
-    # so it knows SSE2 is supported even though that file is
151
-    # compiled without SSE2 support (for portability).
152
-    set_property(
153
-        SOURCE shuffle.c
154
-        APPEND PROPERTY COMPILE_DEFINITIONS SHUFFLE_SSE2_ENABLED)
155
-endif(COMPILER_SUPPORT_SSE2)
156
-if(COMPILER_SUPPORT_AVX2)
157
-    if (MSVC)
158
-        set_source_files_properties(shuffle-avx2.c bitshuffle-avx2.c PROPERTIES COMPILE_FLAGS "/arch:AVX2")
159
-    else (MSVC)
160
-        set_source_files_properties(shuffle-avx2.c bitshuffle-avx2.c PROPERTIES COMPILE_FLAGS -mavx2)
161
-    endif (MSVC)
162
-
163
-    # Define a symbol for the shuffle-dispatch implementation
164
-    # so it knows AVX2 is supported even though that file is
165
-    # compiled without AVX2 support (for portability).
166
-    set_property(
167
-        SOURCE shuffle.c
168
-        APPEND PROPERTY COMPILE_DEFINITIONS SHUFFLE_AVX2_ENABLED)
169
-endif(COMPILER_SUPPORT_AVX2)
170
-
171
-# When the option has been selected to compile the test suite,
172
-# compile an additional version of blosc_shared which exports
173
-# some normally-hidden symbols (to facilitate unit testing).
174
-if (BUILD_TESTS)
175
-    add_library(blosc_shared_testing SHARED ${SOURCES})
176
-    set_target_properties(blosc_shared_testing PROPERTIES OUTPUT_NAME blosc_testing)
177
-    set_property(
178
-        TARGET blosc_shared_testing
179
-        APPEND PROPERTY COMPILE_DEFINITIONS BLOSC_SHARED_LIBRARY)
180
-    set_property(
181
-        TARGET blosc_shared_testing
182
-        APPEND PROPERTY COMPILE_DEFINITIONS BLOSC_TESTING)
183
-    # TEMP : CMake doesn't automatically add -lpthread here like it does
184
-    # for the blosc_shared target. Force it for now.
185
-    if(UNIX)
186
-        set_property(
187
-            TARGET blosc_shared_testing
188
-            APPEND PROPERTY LINK_FLAGS "-lpthread")
189
-    endif()
190
-endif()
191
-
192
-if (BUILD_SHARED)
193
-    target_link_libraries(blosc_shared ${LIBS})
194
-    target_include_directories(blosc_shared PUBLIC ${BLOSC_INCLUDE_DIRS})
195
-endif()
196
-
197
-if (BUILD_TESTS)
198
-    target_link_libraries(blosc_shared_testing ${LIBS})
199
-    target_include_directories(blosc_shared_testing PUBLIC ${BLOSC_INCLUDE_DIRS})
200
-endif()
201
-
202
-if(BUILD_STATIC)
203
-    add_library(blosc_static STATIC ${SOURCES})
204
-    set_target_properties(blosc_static PROPERTIES OUTPUT_NAME blosc)
205
-    if (MSVC)
206
-        set_target_properties(blosc_static PROPERTIES PREFIX lib)
207
-    endif()
208
-    target_link_libraries(blosc_static ${LIBS})
209
-    target_include_directories(blosc_static PUBLIC ${BLOSC_INCLUDE_DIRS})
210
-endif(BUILD_STATIC)
211
-
212
-# install
213
-if(BLOSC_INSTALL)
214
-    install(FILES blosc.h blosc-export.h DESTINATION include COMPONENT DEV)
215
-    if(BUILD_SHARED)
216
-        install(TARGETS blosc_shared DESTINATION ${lib_dir} COMPONENT LIB)
217
-    endif(BUILD_SHARED)
218
-    if(BUILD_STATIC)
219
-        install(TARGETS blosc_static DESTINATION ${lib_dir} COMPONENT DEV)
220
-    endif(BUILD_STATIC)
221
-endif(BLOSC_INSTALL)
... ...
@@ -1,4 +1,4 @@
1
-EXTRA_FLAGS+=-I../lz4-1.8.3 -I../snappy-1.1.1 -I../zstd-1.3.8 @MSSE2@ @MAVX2@
1
+EXTRA_FLAGS+=-I../lz4-1.9.2 -I../snappy-1.1.1 -I../zstd-1.4.5 @MSSE2@ @MAVX2@
2 2
 FLAGS=$(PKG_CFLAGS) $(PKG_CPICFLAGS) $(PKG_CPPFLAGS) $(EXTRA_FLAGS)
3 3
 
4 4
 BLOSC_FILES := $(sort $(wildcard *.c))
5 5
similarity index 100%
6 6
rename from src/blosc/lib/lz4-1.8.3/Makefile
7 7
rename to src/blosc/lib/lz4-1.9.2/Makefile
8 8
similarity index 65%
9 9
rename from src/blosc/lib/lz4-1.8.3/lz4.c
10 10
rename to src/blosc/lib/lz4-1.9.2/lz4.c
... ...
@@ -32,7 +32,6 @@
32 32
     - LZ4 source repository : https://github.com/lz4/lz4
33 33
 */
34 34
 
35
-
36 35
 /*-************************************
37 36
 *  Tuning parameters
38 37
 **************************************/
... ...
@@ -91,8 +90,23 @@
91 90
 /*-************************************
92 91
 *  Dependency
93 92
 **************************************/
93
+/*
94
+ * LZ4_SRC_INCLUDED:
95
+ * Amalgamation flag, whether lz4.c is included
96
+ */
97
+#ifndef LZ4_SRC_INCLUDED
98
+#  define LZ4_SRC_INCLUDED 1
99
+#endif
100
+
101
+#ifndef LZ4_STATIC_LINKING_ONLY
94 102
 #define LZ4_STATIC_LINKING_ONLY
103
+#endif
104
+
105
+#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
95 106
 #define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
107
+#endif
108
+
109
+#define LZ4_STATIC_LINKING_ONLY  /* LZ4_DISTANCE_MAX */
96 110
 #include "lz4.h"
97 111
 /* see also "memory routines" below */
98 112
 
... ...
@@ -123,7 +137,7 @@
123 137
 #endif /* LZ4_FORCE_INLINE */
124 138
 
125 139
 /* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
126
- * Gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy,
140
+ * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
127 141
  * together with a simple 8-byte copy loop as a fall-back path.
128 142
  * However, this optimization hurts the decompression speed by >30%,
129 143
  * because the execution does not go to the optimized loop
... ...
@@ -131,12 +145,12 @@
131 145
  * before going to the fall-back path become useless overhead.
132 146
  * This optimization happens only with the -O3 flag, and -O2 generates
133 147
  * a simple 8-byte copy loop.
134
- * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy
148
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
135 149
  * functions are annotated with __attribute__((optimize("O2"))),
136
- * and also LZ4_wildCopy is forcibly inlined, so that the O2 attribute
137
- * of LZ4_wildCopy does not affect the compression speed.
150
+ * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
151
+ * of LZ4_wildCopy8 does not affect the compression speed.
138 152
  */
139
-#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__)
153
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
140 154
 #  define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
141 155
 #  define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
142 156
 #else
... ...
@@ -170,7 +184,61 @@
170 184
 
171 185
 
172 186
 /*-************************************
173
-*  Basic Types
187
+*  Common Constants
188
+**************************************/
189
+#define MINMATCH 4
190
+
191
+#define WILDCOPYLENGTH 8
192
+#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
193
+#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
194
+#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
195
+#define FASTLOOP_SAFE_DISTANCE 64
196
+static const int LZ4_minLength = (MFLIMIT+1);
197
+
198
+#define KB *(1 <<10)
199
+#define MB *(1 <<20)
200
+#define GB *(1U<<30)
201
+
202
+#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
203
+#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
204
+#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
205
+#endif
206
+
207
+#define ML_BITS  4
208
+#define ML_MASK  ((1U<<ML_BITS)-1)
209
+#define RUN_BITS (8-ML_BITS)
210
+#define RUN_MASK ((1U<<RUN_BITS)-1)
211
+
212
+
213
+/*-************************************
214
+*  Error detection
215
+**************************************/
216
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
217
+#  include <assert.h>
218
+#else
219
+#  ifndef assert
220
+#    define assert(condition) ((void)0)
221
+#  endif
222
+#endif
223
+
224
+#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
225
+
226
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
227
+#  include <stdio.h>
228
+static int g_debuglog_enable = 1;
229
+#  define DEBUGLOG(l, ...) {                                  \
230
+                if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
231
+                    fprintf(stderr, __FILE__ ": ");           \
232
+                    fprintf(stderr, __VA_ARGS__);             \
233
+                    fprintf(stderr, " \n");                   \
234
+            }   }
235
+#else
236
+#  define DEBUGLOG(l, ...)      {}    /* disabled */
237
+#endif
238
+
239
+
240
+/*-************************************
241
+*  Types
174 242
 **************************************/
175 243
 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
176 244
 # include <stdint.h>
... ...
@@ -195,6 +263,13 @@
195 263
   typedef size_t reg_t;   /* 32-bits in x32 mode */
196 264
 #endif
197 265
 
266
+typedef enum {
267
+    notLimited = 0,
268
+    limitedOutput = 1,
269
+    fillOutput = 2
270
+} limitedOutput_directive;
271
+
272
+
198 273
 /*-************************************
199 274
 *  Reading and writing into memory
200 275
 **************************************/
... ...
@@ -228,7 +303,7 @@ static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArc
228 303
 static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
229 304
 static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
230 305
 
231
-#else  /* safe and portable access through memcpy() */
306
+#else  /* safe and portable access using memcpy() */
232 307
 
233 308
 static U16 LZ4_read16(const void* memPtr)
234 309
 {
... ...
@@ -281,7 +356,7 @@ static void LZ4_writeLE16(void* memPtr, U16 value)
281 356
 
282 357
 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
283 358
 LZ4_FORCE_O2_INLINE_GCC_PPC64LE
284
-void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
359
+void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
285 360
 {
286 361
     BYTE* d = (BYTE*)dstPtr;
287 362
     const BYTE* s = (const BYTE*)srcPtr;
... ...
@@ -290,55 +365,95 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
290 365
     do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
291 366
 }
292 367
 
368
+static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
369
+static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
293 370
 
294
-/*-************************************
295
-*  Common Constants
296
-**************************************/
297
-#define MINMATCH 4
298
-
299
-#define WILDCOPYLENGTH 8
300
-#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
301
-#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
302
-#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
303
-static const int LZ4_minLength = (MFLIMIT+1);
304 371
 
305
-#define KB *(1 <<10)
306
-#define MB *(1 <<20)
307
-#define GB *(1U<<30)
372
+#ifndef LZ4_FAST_DEC_LOOP
373
+#  if defined(__i386__) || defined(__x86_64__)
374
+#    define LZ4_FAST_DEC_LOOP 1
375
+#  elif defined(__aarch64__) && !defined(__clang__)
376
+     /* On aarch64, we disable this optimization for clang because on certain
377
+      * mobile chipsets and clang, it reduces performance. For more information
378
+      * refer to https://github.com/lz4/lz4/pull/707. */
379
+#    define LZ4_FAST_DEC_LOOP 1
380
+#  else
381
+#    define LZ4_FAST_DEC_LOOP 0
382
+#  endif
383
+#endif
308 384
 
309
-#define MAXD_LOG 16
310
-#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
385
+#if LZ4_FAST_DEC_LOOP
311 386
 
312
-#define ML_BITS  4
313
-#define ML_MASK  ((1U<<ML_BITS)-1)
314
-#define RUN_BITS (8-ML_BITS)
315
-#define RUN_MASK ((1U<<RUN_BITS)-1)
387
+LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
388
+LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
389
+{
390
+    if (offset < 8) {
391
+        dstPtr[0] = srcPtr[0];
392
+        dstPtr[1] = srcPtr[1];
393
+        dstPtr[2] = srcPtr[2];
394
+        dstPtr[3] = srcPtr[3];
395
+        srcPtr += inc32table[offset];
396
+        memcpy(dstPtr+4, srcPtr, 4);
397
+        srcPtr -= dec64table[offset];
398
+        dstPtr += 8;
399
+    } else {
400
+        memcpy(dstPtr, srcPtr, 8);
401
+        dstPtr += 8;
402
+        srcPtr += 8;
403
+    }
316 404
 
405
+    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
406
+}
317 407
 
318
-/*-************************************
319
-*  Error detection
320
-**************************************/
321
-#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
322
-#  include <assert.h>
323
-#else
324
-#  ifndef assert
325
-#    define assert(condition) ((void)0)
326
-#  endif
327
-#endif
408
+/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
409
+ * this version copies two times 16 bytes (instead of one time 32 bytes)
410
+ * because it must be compatible with offsets >= 16. */
411
+LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
412
+LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
413
+{
414
+    BYTE* d = (BYTE*)dstPtr;
415
+    const BYTE* s = (const BYTE*)srcPtr;
416
+    BYTE* const e = (BYTE*)dstEnd;
328 417
 
329
-#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
418
+    do { memcpy(d,s,16); memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
419
+}
420
+
421
+/* LZ4_memcpy_using_offset()  presumes :
422
+ * - dstEnd >= dstPtr + MINMATCH
423
+ * - there is at least 8 bytes available to write after dstEnd */
424
+LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
425
+LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
426
+{
427
+    BYTE v[8];
428
+
429
+    assert(dstEnd >= dstPtr + MINMATCH);
430
+    LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
431
+
432
+    switch(offset) {
433
+    case 1:
434
+        memset(v, *srcPtr, 8);
435
+        break;
436
+    case 2:
437
+        memcpy(v, srcPtr, 2);
438
+        memcpy(&v[2], srcPtr, 2);
439
+        memcpy(&v[4], &v[0], 4);
440
+        break;
441
+    case 4:
442
+        memcpy(v, srcPtr, 4);
443
+        memcpy(&v[4], srcPtr, 4);
444
+        break;
445
+    default:
446
+        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
447
+        return;
448
+    }
330 449
 
331
-#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
332
-#  include <stdio.h>
333
-static int g_debuglog_enable = 1;
334
-#  define DEBUGLOG(l, ...) {                                  \
335
-                if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
336
-                    fprintf(stderr, __FILE__ ": ");           \
337
-                    fprintf(stderr, __VA_ARGS__);             \
338
-                    fprintf(stderr, " \n");                   \
339
-            }   }
340
-#else
341
-#  define DEBUGLOG(l, ...)      {}    /* disabled */
450
+    memcpy(dstPtr, v, 8);
451
+    dstPtr += 8;
452
+    while (dstPtr < dstEnd) {
453
+        memcpy(dstPtr, v, 8);
454
+        dstPtr += 8;
455
+    }
456
+}
342 457
 #endif
343 458
 
344 459
 
... ...
@@ -354,7 +469,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
354 469
             _BitScanForward64( &r, (U64)val );
355 470
             return (int)(r>>3);
356 471
 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
357
-            return (__builtin_ctzll((U64)val) >> 3);
472
+            return (unsigned)__builtin_ctzll((U64)val) >> 3;
358 473
 #       else
359 474
             static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
360 475
                                                      0, 3, 1, 3, 1, 4, 2, 7,
... ...
@@ -372,7 +487,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
372 487
             _BitScanForward( &r, (U32)val );
373 488
             return (int)(r>>3);
374 489
 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
375
-            return (__builtin_ctz((U32)val) >> 3);
490
+            return (unsigned)__builtin_ctz((U32)val) >> 3;
376 491
 #       else
377 492
             static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
378 493
                                                      3, 2, 2, 1, 3, 2, 0, 1,
... ...
@@ -388,7 +503,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
388 503
             _BitScanReverse64( &r, val );
389 504
             return (unsigned)(r>>3);
390 505
 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
391
-            return (__builtin_clzll((U64)val) >> 3);
506
+            return (unsigned)__builtin_clzll((U64)val) >> 3;
392 507
 #       else
393 508
             static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
394 509
                 Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
... ...
@@ -405,7 +520,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
405 520
             _BitScanReverse( &r, (unsigned long)val );
406 521
             return (unsigned)(r>>3);
407 522
 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
408
-            return (__builtin_clz((U32)val) >> 3);
523
+            return (unsigned)__builtin_clz((U32)val) >> 3;
409 524
 #       else
410 525
             unsigned r;
411 526
             if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
... ...
@@ -455,7 +570,6 @@ static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression ru
455 570
 /*-************************************
456 571
 *  Local Structures and types
457 572
 **************************************/
458
-typedef enum { notLimited = 0, limitedOutput = 1, fillOutput = 2 } limitedOutput_directive;
459 573
 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
460 574
 
461 575
 /**
... ...
@@ -501,9 +615,11 @@ int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
501 615
 extern "C" {
502 616
 #endif
503 617
 
504
-int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
618
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
505 619
 
506
-int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize);
620
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
621
+                                     int compressedSize, int maxOutputSize,
622
+                                     const void* dictStart, size_t dictSize);
507 623
 
508 624
 #if defined (__cplusplus)
509 625
 }
... ...
@@ -522,13 +638,14 @@ static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
522 638
 
523 639
 static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
524 640
 {
525
-    static const U64 prime5bytes = 889523592379ULL;
526
-    static const U64 prime8bytes = 11400714785074694791ULL;
527 641
     const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
528
-    if (LZ4_isLittleEndian())
642
+    if (LZ4_isLittleEndian()) {
643
+        const U64 prime5bytes = 889523592379ULL;
529 644
         return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
530
-    else
645
+    } else {
646
+        const U64 prime8bytes = 11400714785074694791ULL;
531 647
         return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
648
+    }
532 649
 }
533 650
 
534 651
 LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
... ...
@@ -537,6 +654,18 @@ LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tab
537 654
     return LZ4_hash4(LZ4_read32(p), tableType);
538 655
 }
539 656
 
657
+static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
658
+{
659
+    switch (tableType)
660
+    {
661
+    default: /* fallthrough */
662
+    case clearedTable: { /* illegal! */ assert(0); return; }
663
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
664
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
665
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
666
+    }
667
+}
668
+
540 669
 static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
541 670
 {
542 671
     switch (tableType)
... ...
@@ -597,26 +726,37 @@ static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType
597 726
     { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
598 727
 }
599 728
 
600
-LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p,
601
-                                             const void* tableBase, tableType_t tableType,
602
-                                             const BYTE* srcBase)
729
+LZ4_FORCE_INLINE const BYTE*
730
+LZ4_getPosition(const BYTE* p,
731
+                const void* tableBase, tableType_t tableType,
732
+                const BYTE* srcBase)
603 733
 {
604 734
     U32 const h = LZ4_hashPosition(p, tableType);
605 735
     return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
606 736
 }
607 737
 
608
-LZ4_FORCE_INLINE void LZ4_prepareTable(
609
-        LZ4_stream_t_internal* const cctx,
610
-        const int inputSize,
611
-        const tableType_t tableType) {
738
+LZ4_FORCE_INLINE void
739
+LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
740
+           const int inputSize,
741
+           const tableType_t tableType) {
742
+    /* If compression failed during the previous step, then the context
743
+     * is marked as dirty, therefore, it has to be fully reset.
744
+     */
745
+    if (cctx->dirty) {
746
+        DEBUGLOG(5, "LZ4_prepareTable: Full reset for %p", cctx);
747
+        MEM_INIT(cctx, 0, sizeof(LZ4_stream_t_internal));
748
+        return;
749
+    }
750
+
612 751
     /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
613 752
      * therefore safe to use no matter what mode we're in. Otherwise, we figure
614 753
      * out if it's safe to leave as is or whether it needs to be reset.
615 754
      */
616 755
     if (cctx->tableType != clearedTable) {
756
+        assert(inputSize >= 0);
617 757
         if (cctx->tableType != tableType
618
-          || (tableType == byU16 && cctx->currentOffset + inputSize >= 0xFFFFU)
619
-          || (tableType == byU32 && cctx->currentOffset > 1 GB)
758
+          || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
759
+          || ((tableType == byU32) && cctx->currentOffset > 1 GB)
620 760
           || tableType == byPtr
621 761
           || inputSize >= 4 KB)
622 762
         {
... ...
@@ -629,7 +769,7 @@ LZ4_FORCE_INLINE void LZ4_prepareTable(
629 769
         }
630 770
     }
631 771
 
632
-    /* Adding a gap, so all previous entries are > MAX_DISTANCE back, is faster
772
+    /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster
633 773
      * than compressing without a gap. However, compressing with
634 774
      * currentOffset == 0 is faster still, so we preserve that case.
635 775
      */
... ...
@@ -651,14 +791,15 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
651 791
                  const char* const source,
652 792
                  char* const dest,
653 793
                  const int inputSize,
654
-                 int *inputConsumed, /* only written when outputLimited == fillOutput */
794
+                 int *inputConsumed, /* only written when outputDirective == fillOutput */
655 795
                  const int maxOutputSize,
656
-                 const limitedOutput_directive outputLimited,
796
+                 const limitedOutput_directive outputDirective,
657 797
                  const tableType_t tableType,
658 798
                  const dict_directive dictDirective,
659 799
                  const dictIssue_directive dictIssue,
660
-                 const U32 acceleration)
800
+                 const int acceleration)
661 801
 {
802
+    int result;
662 803
     const BYTE* ip = (const BYTE*) source;
663 804
 
664 805
     U32 const startIndex = cctx->currentOffset;
... ...
@@ -693,10 +834,11 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
693 834
     U32 forwardH;
694 835
 
695 836
     DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
696
-    /* Init conditions */
697
-    if (outputLimited == fillOutput && maxOutputSize < 1) return 0; /* Impossible to store anything */
698
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported inputSize, too large (or negative) */
699
-    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;  /* Size too large (not within 64K limit) */
837
+    /* If init conditions are not met, we don't have to mark stream
838
+     * as having dirty context, since no action was taken yet */
839
+    if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
840
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }           /* Unsupported inputSize, too large (or negative) */
841
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; }  /* Size too large (not within 64K limit) */
700 842
     if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
701 843
     assert(acceleration >= 1);
702 844
 
... ...
@@ -724,12 +866,13 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
724 866
     for ( ; ; ) {
725 867
         const BYTE* match;
726 868
         BYTE* token;
869
+        const BYTE* filledIp;
727 870
 
728 871
         /* Find a match */
729 872
         if (tableType == byPtr) {
730 873
             const BYTE* forwardIp = ip;
731
-            unsigned step = 1;
732
-            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
874
+            int step = 1;
875
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
733 876
             do {
734 877
                 U32 const h = forwardH;
735 878
                 ip = forwardIp;
... ...
@@ -743,14 +886,14 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
743 886
                 forwardH = LZ4_hashPosition(forwardIp, tableType);
744 887
                 LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
745 888
 
746
-            } while ( (match+MAX_DISTANCE < ip)
889
+            } while ( (match+LZ4_DISTANCE_MAX < ip)
747 890
                    || (LZ4_read32(match) != LZ4_read32(ip)) );
748 891
 
749 892
         } else {   /* byU32, byU16 */
750 893
 
751 894
             const BYTE* forwardIp = ip;
752
-            unsigned step = 1;
753
-            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
895
+            int step = 1;
896
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
754 897
             do {
755 898
                 U32 const h = forwardH;
756 899
                 U32 const current = (U32)(forwardIp - base);
... ...
@@ -792,10 +935,14 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
792 935
                 forwardH = LZ4_hashPosition(forwardIp, tableType);
793 936
                 LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
794 937
 
795
-                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) continue;    /* match outside of valid area */
938
+                DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
939
+                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
796 940
                 assert(matchIndex < current);
797
-                if ((tableType != byU16) && (matchIndex+MAX_DISTANCE < current)) continue;  /* too far */
798
-                if (tableType == byU16) assert((current - matchIndex) <= MAX_DISTANCE);     /* too_far presumed impossible with byU16 */
941
+                if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
942
+                  && (matchIndex+LZ4_DISTANCE_MAX < current)) {
943
+                    continue;
944
+                } /* too far */
945
+                assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
799 946
 
800 947
                 if (LZ4_read32(match) == LZ4_read32(ip)) {
801 948
                     if (maybe_extMem) offset = current - matchIndex;
... ...
@@ -806,21 +953,23 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
806 953
         }
807 954
 
808 955
         /* Catch up */
956
+        filledIp = ip;
809 957
         while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
810 958
 
811 959
         /* Encode Literals */
812 960
         {   unsigned const litLength = (unsigned)(ip - anchor);
813 961
             token = op++;
814
-            if ((outputLimited == limitedOutput) &&  /* Check output buffer overflow */
815
-                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
816
-                return 0;
817
-            if ((outputLimited == fillOutput) &&
962
+            if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
963
+                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
964
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
965
+            }
966
+            if ((outputDirective == fillOutput) &&
818 967
                 (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
819 968
                 op--;
820 969
                 goto _last_literals;
821 970
             }
822 971
             if (litLength >= RUN_MASK) {
823
-                int len = (int)litLength-RUN_MASK;
972
+                int len = (int)(litLength - RUN_MASK);
824 973
                 *token = (RUN_MASK<<ML_BITS);
825 974
                 for(; len >= 255 ; len-=255) *op++ = 255;
826 975
                 *op++ = (BYTE)len;
... ...
@@ -828,7 +977,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
828 977
             else *token = (BYTE)(litLength<<ML_BITS);
829 978
 
830 979
             /* Copy Literals */
831
-            LZ4_wildCopy(op, anchor, op+litLength);
980
+            LZ4_wildCopy8(op, anchor, op+litLength);
832 981
             op+=litLength;
833 982
             DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
834 983
                         (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
... ...
@@ -843,7 +992,7 @@ _next_match:
843 992
          * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
844 993
          */
845 994
 
846
-        if ((outputLimited == fillOutput) &&
995
+        if ((outputDirective == fillOutput) &&
847 996
             (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
848 997
             /* the match was too close to the end, rewind and go to last literals */
849 998
             op = token;
... ...
@@ -853,11 +1002,11 @@ _next_match:
853 1002
         /* Encode Offset */
854 1003
         if (maybe_extMem) {   /* static test */
855 1004
             DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
856
-            assert(offset <= MAX_DISTANCE && offset > 0);
1005
+            assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
857 1006
             LZ4_writeLE16(op, (U16)offset); op+=2;
858 1007
         } else  {
859 1008
             DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
860
-            assert(ip-match <= MAX_DISTANCE);
1009
+            assert(ip-match <= LZ4_DISTANCE_MAX);
861 1010
             LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
862 1011
         }
863 1012
 
... ...
@@ -870,7 +1019,7 @@ _next_match:
870 1019
                 assert(dictEnd > match);
871 1020
                 if (limit > matchlimit) limit = matchlimit;
872 1021
                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
873
-                ip += MINMATCH + matchCode;
1022
+                ip += (size_t)matchCode + MINMATCH;
874 1023
                 if (ip==limit) {
875 1024
                     unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
876 1025
                     matchCode += more;
... ...
@@ -879,19 +1028,34 @@ _next_match:
879 1028
                 DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
880 1029
             } else {
881 1030
                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
882
-                ip += MINMATCH + matchCode;
1031
+                ip += (size_t)matchCode + MINMATCH;
883 1032
                 DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
884 1033
             }
885 1034
 
886
-            if ((outputLimited) &&    /* Check output buffer overflow */
887
-                (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) {
888
-                if (outputLimited == limitedOutput)
889
-                  return 0;
890
-                if (outputLimited == fillOutput) {
1035
+            if ((outputDirective) &&    /* Check output buffer overflow */
1036
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
1037
+                if (outputDirective == fillOutput) {
891 1038
                     /* Match description too long : reduce it */
892
-                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
1039
+                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
893 1040
                     ip -= matchCode - newMatchCode;
1041
+                    assert(newMatchCode < matchCode);
894 1042
                     matchCode = newMatchCode;
1043
+                    if (unlikely(ip <= filledIp)) {
1044
+                        /* We have already filled up to filledIp so if ip ends up less than filledIp
1045
+                         * we have positions in the hash table beyond the current position. This is
1046
+                         * a problem if we reuse the hash table. So we have to remove these positions
1047
+                         * from the hash table.
1048
+                         */
1049
+                        const BYTE* ptr;
1050
+                        DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
1051
+                        for (ptr = ip; ptr <= filledIp; ++ptr) {
1052
+                            U32 const h = LZ4_hashPosition(ptr, tableType);
1053
+                            LZ4_clearHash(h, cctx->hashTable, tableType);
1054
+                        }
1055
+                    }
1056
+                } else {
1057
+                    assert(outputDirective == limitedOutput);
1058
+                    return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
895 1059
                 }
896 1060
             }
897 1061
             if (matchCode >= ML_MASK) {
... ...
@@ -908,6 +1072,8 @@ _next_match:
908 1072
             } else
909 1073
                 *token += (BYTE)(matchCode);
910 1074
         }
1075
+        /* Ensure we have enough space for the last literals. */
1076
+        assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
911 1077
 
912 1078
         anchor = ip;
913 1079
 
... ...
@@ -922,7 +1088,7 @@ _next_match:
922 1088
 
923 1089
             match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
924 1090
             LZ4_putPosition(ip, cctx->hashTable, tableType, base);
925
-            if ( (match+MAX_DISTANCE >= ip)
1091
+            if ( (match+LZ4_DISTANCE_MAX >= ip)
926 1092
               && (LZ4_read32(match) == LZ4_read32(ip)) )
927 1093
             { token=op++; *token=0; goto _next_match; }
928 1094
 
... ...
@@ -957,7 +1123,7 @@ _next_match:
957 1123
             LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
958 1124
             assert(matchIndex < current);
959 1125
             if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
960
-              && ((tableType==byU16) ? 1 : (matchIndex+MAX_DISTANCE >= current))
1126
+              && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
961 1127
               && (LZ4_read32(match) == LZ4_read32(ip)) ) {
962 1128
                 token=op++;
963 1129
                 *token=0;
... ...
@@ -976,15 +1142,17 @@ _next_match:
976 1142
 _last_literals:
977 1143
     /* Encode Last Literals */
978 1144
     {   size_t lastRun = (size_t)(iend - anchor);
979
-        if ( (outputLimited) &&  /* Check output buffer overflow */
1145
+        if ( (outputDirective) &&  /* Check output buffer overflow */
980 1146
             (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
981
-            if (outputLimited == fillOutput) {
1147
+            if (outputDirective == fillOutput) {
982 1148
                 /* adapt lastRun to fill 'dst' */
983
-                lastRun  = (olimit-op) - 1;
1149
+                assert(olimit >= op);
1150
+                lastRun  = (size_t)(olimit-op) - 1;
984 1151
                 lastRun -= (lastRun+240)/255;
1152
+            } else {
1153
+                assert(outputDirective == limitedOutput);
1154
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
985 1155
             }
986
-            if (outputLimited == limitedOutput)
987
-                return 0;
988 1156
         }
989 1157
         if (lastRun >= RUN_MASK) {
990 1158
             size_t accumulator = lastRun - RUN_MASK;
... ...
@@ -999,31 +1167,33 @@ _last_literals:
999 1167
         op += lastRun;
1000 1168
     }
1001 1169
 
1002
-    if (outputLimited == fillOutput) {
1170
+    if (outputDirective == fillOutput) {
1003 1171
         *inputConsumed = (int) (((const char*)ip)-source);
1004 1172
     }
1005 1173
     DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest));
1006
-    return (int)(((char*)op) - dest);
1174
+    result = (int)(((char*)op) - dest);
1175
+    assert(result > 0);
1176
+    return result;
1007 1177
 }
1008 1178
 
1009 1179
 
1010 1180
 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1011 1181
 {
1012
-    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
1182
+    LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
1183
+    assert(ctx != NULL);
1013 1184
     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1014
-    LZ4_resetStream((LZ4_stream_t*)state);
1015 1185
     if (maxOutputSize >= LZ4_compressBound(inputSize)) {
1016 1186
         if (inputSize < LZ4_64Klimit) {
1017 1187
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
1018 1188
         } else {
1019
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
1189
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1020 1190
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1021 1191
         }
1022 1192
     } else {
1023
-        if (inputSize < LZ4_64Klimit) {;
1193
+        if (inputSize < LZ4_64Klimit) {
1024 1194
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
1025 1195
         } else {
1026
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
1196
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1027 1197
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1028 1198
         }
1029 1199
     }
... ...
@@ -1053,7 +1223,7 @@ int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst
1053 1223
                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1054 1224
             }
1055 1225
         } else {
1056
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
1226
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1057 1227
             LZ4_prepareTable(ctx, srcSize, tableType);
1058 1228
             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1059 1229
         }
... ...
@@ -1067,7 +1237,7 @@ int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst
1067 1237
                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1068 1238
             }
1069 1239
         } else {
1070
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
1240
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1071 1241
             LZ4_prepareTable(ctx, srcSize, tableType);
1072 1242
             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1073 1243
         }
... ...
@@ -1094,23 +1264,25 @@ int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutp
1094 1264
 }
1095 1265
 
1096 1266
 
1097
-int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize)
1267
+int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
1098 1268
 {
1099
-    return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1);
1269
+    return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
1100 1270
 }
1101 1271
 
1102 1272
 
1103 1273
 /* hidden debug function */
1104 1274
 /* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
1105
-int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1275
+int LZ4_compress_fast_force(const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
1106 1276
 {
1107 1277
     LZ4_stream_t ctx;
1108
-    LZ4_resetStream(&ctx);
1278
+    LZ4_initStream(&ctx, sizeof(ctx));
1109 1279
 
1110
-    if (inputSize < LZ4_64Klimit)
1111
-        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
1112
-    else
1113
-        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, sizeof(void*)==8 ? byU32 : byPtr, noDict, noDictIssue, acceleration);
1280
+    if (srcSize < LZ4_64Klimit) {
1281
+        return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, byU16,    noDict, noDictIssue, acceleration);
1282
+    } else {
1283
+        tableType_t const addrMode = (sizeof(void*) > 4) ? byU32 : byPtr;
1284
+        return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, addrMode, noDict, noDictIssue, acceleration);
1285
+    }
1114 1286
 }
1115 1287
 
1116 1288
 
... ...
@@ -1119,7 +1291,8 @@ int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int m
1119 1291
  * _continue() call without resetting it. */
1120 1292
 static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
1121 1293
 {
1122
-    LZ4_resetStream(state);
1294
+    void* const s = LZ4_initStream(state, sizeof (*state));
1295
+    assert(s != NULL); (void)s;
1123 1296
 
1124 1297
     if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
1125 1298
         return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
... ...
@@ -1127,8 +1300,8 @@ static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src,
1127 1300
         if (*srcSizePtr < LZ4_64Klimit) {
1128 1301
             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
1129 1302
         } else {
1130
-            tableType_t const tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
1131
-            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, tableType, noDict, noDictIssue, 1);
1303
+            tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1304
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
1132 1305
     }   }
1133 1306
 }
1134 1307
 
... ...
@@ -1159,14 +1332,40 @@ int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targe
1159 1332
 
1160 1333
 LZ4_stream_t* LZ4_createStream(void)
1161 1334
 {
1162
-    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
1335
+    LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
1163 1336
     LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
1164 1337
     DEBUGLOG(4, "LZ4_createStream %p", lz4s);
1165 1338
     if (lz4s == NULL) return NULL;
1166
-    LZ4_resetStream(lz4s);
1339
+    LZ4_initStream(lz4s, sizeof(*lz4s));
1167 1340
     return lz4s;
1168 1341
 }
1169 1342
 
1343
+#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
1344
+                     it reports an aligment of 8-bytes,
1345
+                     while actually aligning LZ4_stream_t on 4 bytes. */
1346
+static size_t LZ4_stream_t_alignment(void)
1347
+{
1348
+    struct { char c; LZ4_stream_t t; } t_a;
1349
+    return sizeof(t_a) - sizeof(t_a.t);
1350
+}
1351
+#endif
1352
+
1353
+LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
1354
+{
1355
+    DEBUGLOG(5, "LZ4_initStream");
1356
+    if (buffer == NULL) { return NULL; }
1357
+    if (size < sizeof(LZ4_stream_t)) { return NULL; }
1358
+#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
1359
+                     it reports an aligment of 8-bytes,
1360
+                     while actually aligning LZ4_stream_t on 4 bytes. */
1361
+    if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) { return NULL; } /* alignment check */
1362
+#endif
1363
+    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t));
1364
+    return (LZ4_stream_t*)buffer;
1365
+}
1366
+
1367
+/* resetStream is now deprecated,
1368
+ * prefer initStream() which is more general */
1170 1369
 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
1171 1370
 {
1172 1371
     DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
... ...
@@ -1209,46 +1408,64 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
1209 1408
      * there are only valid offsets in the window, which allows an optimization
1210 1409
      * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
1211 1410
      * dictionary isn't a full 64k. */
1212
-
1213
-    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
1214
-    base = dictEnd - 64 KB - dict->currentOffset;
1215
-    dict->dictionary = p;
1216
-    dict->dictSize = (U32)(dictEnd - p);
1217 1411
     dict->currentOffset += 64 KB;
1218
-    dict->tableType = tableType;
1219 1412
 
1220 1413
     if (dictSize < (int)HASH_UNIT) {
1221 1414
         return 0;
1222 1415
     }
1223 1416
 
1417
+    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
1418
+    base = dictEnd - dict->currentOffset;
1419
+    dict->dictionary = p;
1420
+    dict->dictSize = (U32)(dictEnd - p);
1421
+    dict->tableType = tableType;
1422
+
1224 1423
     while (p <= dictEnd-HASH_UNIT) {
1225 1424
         LZ4_putPosition(p, dict->hashTable, tableType, base);
1226 1425
         p+=3;
1227 1426
     }
1228 1427
 
1229
-    return dict->dictSize;
1428
+    return (int)dict->dictSize;
1230 1429
 }
1231 1430
 
1232
-void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream) {
1233
-    if (dictionary_stream != NULL) {
1431
+void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) {
1432
+    const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL :
1433
+        &(dictionaryStream->internal_donotuse);
1434
+
1435
+    DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
1436
+             workingStream, dictionaryStream,
1437
+             dictCtx != NULL ? dictCtx->dictSize : 0);
1438
+
1439
+    /* Calling LZ4_resetStream_fast() here makes sure that changes will not be
1440
+     * erased by subsequent calls to LZ4_resetStream_fast() in case stream was
1441
+     * marked as having dirty context, e.g. requiring full reset.
1442
+     */
1443
+    LZ4_resetStream_fast(workingStream);
1444
+
1445
+    if (dictCtx != NULL) {
1234 1446
         /* If the current offset is zero, we will never look in the
1235 1447
          * external dictionary context, since there is no value a table
1236 1448
          * entry can take that indicate a miss. In that case, we need
1237 1449
          * to bump the offset to something non-zero.
1238 1450
          */
1239
-        if (working_stream->internal_donotuse.currentOffset == 0) {
1240
-            working_stream->internal_donotuse.currentOffset = 64 KB;
1451
+        if (workingStream->internal_donotuse.currentOffset == 0) {
1452
+            workingStream->internal_donotuse.currentOffset = 64 KB;
1453
+        }
1454
+
1455
+        /* Don't actually attach an empty dictionary.
1456
+         */
1457
+        if (dictCtx->dictSize == 0) {
1458
+            dictCtx = NULL;
1241 1459
         }
1242
-        working_stream->internal_donotuse.dictCtx = &(dictionary_stream->internal_donotuse);
1243
-    } else {
1244
-        working_stream->internal_donotuse.dictCtx = NULL;
1245 1460
     }
1461
+    workingStream->internal_donotuse.dictCtx = dictCtx;
1246 1462
 }
1247 1463
 
1248 1464
 
1249 1465
 static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
1250 1466
 {
1251
-    if (LZ4_dict->currentOffset + nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
1467
+    assert(nextSize >= 0);
1468
+    if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
1252 1469
         /* rescale hash table */
1253 1470
         U32 const delta = LZ4_dict->currentOffset - 64 KB;
1254 1471
         const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
... ...
@@ -1265,7 +1482,10 @@ static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
1265 1482
 }
1266 1483
 
1267 1484
 
1268
-int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1485
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
1486
+                                const char* source, char* dest,
1487
+                                int inputSize, int maxOutputSize,
1488
+                                int acceleration)
1269 1489
 {
1270 1490
     const tableType_t tableType = byU32;
1271 1491
     LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
... ...
@@ -1273,12 +1493,12 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, ch
1273 1493
 
1274 1494
     DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
1275 1495
 
1276
-    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
1496
+    if (streamPtr->dirty) { return 0; } /* Uninitialized structure detected */
1277 1497
     LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
1278 1498
     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1279 1499
 
1280 1500
     /* invalidate tiny dictionaries */
1281
-    if ( (streamPtr->dictSize-1 < 4)   /* intentional underflow */
1501
+    if ( (streamPtr->dictSize-1 < 4-1)   /* intentional underflow */
1282 1502
       && (dictEnd != (const BYTE*)source) ) {
1283 1503
         DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
1284 1504
         streamPtr->dictSize = 0;
... ...
@@ -1370,8 +1590,8 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
1370 1590
     LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
1371 1591
     const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
1372 1592
 
1373
-    if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
1374
-    if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
1593
+    if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
1594
+    if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
1375 1595
 
1376 1596
     memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
1377 1597
 
... ...
@@ -1393,6 +1613,37 @@ typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
1393 1613
 #undef MIN
1394 1614
 #define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
1395 1615
 
1616
+/* Read the variable-length literal or match length.
1617
+ *
1618
+ * ip - pointer to use as input.
1619
+ * lencheck - end ip.  Return an error if ip advances >= lencheck.
1620
+ * loop_check - check ip >= lencheck in body of loop.  Returns loop_error if so.
1621
+ * initial_check - check ip >= lencheck before start of loop.  Returns initial_error if so.
1622
+ * error (output) - error code.  Should be set to 0 before call.
1623
+ */
1624
+typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
1625
+LZ4_FORCE_INLINE unsigned
1626
+read_variable_length(const BYTE**ip, const BYTE* lencheck, int loop_check, int initial_check, variable_length_error* error)
1627
+{
1628
+  unsigned length = 0;
1629
+  unsigned s;
1630
+  if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
1631
+    *error = initial_error;
1632
+    return length;
1633
+  }
1634
+  do {
1635
+    s = **ip;
1636
+    (*ip)++;
1637
+    length += s;
1638
+    if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
1639
+      *error = loop_error;
1640
+      return length;
1641
+    }
1642
+  } while (s==255);
1643
+
1644
+  return length;
1645
+}
1646
+
1396 1647
 /*! LZ4_decompress_generic() :
1397 1648
  *  This generic decompression function covers all use cases.
1398 1649
  *  It shall be instantiated several times, using different sets of directives.
... ...
@@ -1414,234 +1665,406 @@ LZ4_decompress_generic(
1414 1665
                  const size_t dictSize         /* note : = 0 if noDict */
1415 1666
                  )
1416 1667
 {
1417
-    const BYTE* ip = (const BYTE*) src;
1418
-    const BYTE* const iend = ip + srcSize;
1668
+    if (src == NULL) { return -1; }
1419 1669
 
1420
-    BYTE* op = (BYTE*) dst;
1421
-    BYTE* const oend = op + outputSize;
1422
-    BYTE* cpy;
1670
+    {   const BYTE* ip = (const BYTE*) src;
1671
+        const BYTE* const iend = ip + srcSize;
1423 1672
 
1424
-    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
1425
-    const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
1426
-    const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
1673
+        BYTE* op = (BYTE*) dst;
1674
+        BYTE* const oend = op + outputSize;
1675
+        BYTE* cpy;
1427 1676
 
1428
-    const int safeDecode = (endOnInput==endOnInputSize);
1429
-    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
1677
+        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
1430 1678
 
1431
-    /* Set up the "end" pointers for the shortcut. */
1432
-    const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
1433
-    const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
1679
+        const int safeDecode = (endOnInput==endOnInputSize);
1680
+        const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
1434 1681
 
1435
-    DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
1436 1682
 
1437
-    /* Special cases */
1438
-    assert(lowPrefix <= op);
1439
-    assert(src != NULL);
1440
-    if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
1441
-    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1);
1442
-    if ((endOnInput) && unlikely(srcSize==0)) return -1;
1683
+        /* Set up the "end" pointers for the shortcut. */
1684
+        const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
1685
+        const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
1443 1686
 
1444
-    /* Main Loop : decode sequences */
1445
-    while (1) {
1446 1687
         const BYTE* match;
1447 1688
         size_t offset;
1689
+        unsigned token;
1690
+        size_t length;
1448 1691
 
1449
-        unsigned const token = *ip++;
1450
-        size_t length = token >> ML_BITS;  /* literal length */
1451 1692
 
1452
-        assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
1693
+        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
1453 1694
 
1454
-        /* A two-stage shortcut for the most common case:
1455
-         * 1) If the literal length is 0..14, and there is enough space,
1456
-         * enter the shortcut and copy 16 bytes on behalf of the literals
1457
-         * (in the fast mode, only 8 bytes can be safely copied this way).
1458
-         * 2) Further if the match length is 4..18, copy 18 bytes in a similar
1459
-         * manner; but we ensure that there's enough space in the output for
1460
-         * those 18 bytes earlier, upon entering the shortcut (in other words,
1461
-         * there is a combined check for both stages).
1462
-         */
1463
-        if ( (endOnInput ? length != RUN_MASK : length <= 8)
1464
-            /* strictly "less than" on input, to re-enter the loop with at least one byte */
1465
-          && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
1466
-            /* Copy the literals */
1467
-            memcpy(op, ip, endOnInput ? 16 : 8);
1468
-            op += length; ip += length;
1469
-
1470
-            /* The second stage: prepare for match copying, decode full info.
1471
-             * If it doesn't work out, the info won't be wasted. */
1472
-            length = token & ML_MASK; /* match length */
1473
-            offset = LZ4_readLE16(ip); ip += 2;
1474
-            match = op - offset;
1475
-            assert(match <= op); /* check overflow */
1476
-
1477
-            /* Do not deal with overlapping matches. */
1478
-            if ( (length != ML_MASK)
1479
-              && (offset >= 8)
1480
-              && (dict==withPrefix64k || match >= lowPrefix) ) {
1481
-                /* Copy the match. */
1482
-                memcpy(op + 0, match + 0, 8);
1483
-                memcpy(op + 8, match + 8, 8);
1484
-                memcpy(op +16, match +16, 2);
1485
-                op += length + MINMATCH;
1486
-                /* Both stages worked, load the next token. */
1487
-                continue;
1488
-            }
1489
-
1490
-            /* The second stage didn't work out, but the info is ready.
1491
-             * Propel it right to the point of match copying. */
1492
-            goto _copy_match;
1695
+        /* Special cases */
1696
+        assert(lowPrefix <= op);
1697
+        if ((endOnInput) && (unlikely(outputSize==0))) {
1698
+            /* Empty output buffer */
1699
+            if (partialDecoding) return 0;
1700
+            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
1493 1701
         }
1494
-
1495
-        /* decode literal length */
1496
-        if (length == RUN_MASK) {
1497
-            unsigned s;
1498
-            if (unlikely(endOnInput ? ip >= iend-RUN_MASK : 0)) goto _output_error;   /* overflow detection */
1499
-            do {
1500
-                s = *ip++;
1501
-                length += s;
1502
-            } while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) & (s==255) );
1503
-            if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error;   /* overflow detection */
1504
-            if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error;   /* overflow detection */
1702
+        if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
1703
+        if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
1704
+
1705
+	/* Currently the fast loop shows a regression on qualcomm arm chips. */
1706
+#if LZ4_FAST_DEC_LOOP
1707
+        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
1708
+            DEBUGLOG(6, "skip fast decode loop");
1709
+            goto safe_decode;
1505 1710
         }
1506 1711
 
1507
-        /* copy literals */
1508
-        cpy = op+length;
1509
-        LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
1510
-        if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
1511
-          || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
1512
-        {
1513
-            if (partialDecoding) {
1514
-                if (cpy > oend) { cpy = oend; length = oend-op; }             /* Partial decoding : stop in the middle of literal segment */
1515
-                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
1712
+        /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
1713
+        while (1) {
1714
+            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
1715
+            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
1716
+            if (endOnInput) { assert(ip < iend); }
1717
+            token = *ip++;
1718
+            length = token >> ML_BITS;  /* literal length */
1719
+
1720
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
1721
+
1722
+            /* decode literal length */
1723
+            if (length == RUN_MASK) {
1724
+                variable_length_error error = ok;
1725
+                length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
1726
+                if (error == initial_error) { goto _output_error; }
1727
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
1728
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
1729
+
1730
+                /* copy literals */
1731
+                cpy = op+length;
1732
+                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
1733
+                if (endOnInput) {  /* LZ4_decompress_safe() */
1734
+                    if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
1735
+                    LZ4_wildCopy32(op, ip, cpy);
1736
+                } else {   /* LZ4_decompress_fast() */
1737
+                    if (cpy>oend-8) { goto safe_literal_copy; }
1738
+                    LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
1739
+                                                 * it doesn't know input length, and only relies on end-of-block properties */
1740
+                }
1741
+                ip += length; op = cpy;
1516 1742
             } else {
1517
-                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
1518
-                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
1519
-            }
1520
-            memcpy(op, ip, length);
1521
-            ip += length;
1522
-            op += length;
1523
-            if (!partialDecoding || (cpy == oend)) {
1524
-                /* Necessarily EOF, due to parsing restrictions */
1525
-                break;
1743
+                cpy = op+length;
1744
+                if (endOnInput) {  /* LZ4_decompress_safe() */
1745
+                    DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
1746
+                    /* We don't need to check oend, since we check it once for each loop below */
1747
+                    if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
1748
+                    /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
1749
+                    memcpy(op, ip, 16);
1750
+                } else {  /* LZ4_decompress_fast() */
1751
+                    /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
1752
+                     * it doesn't know input length, and relies on end-of-block properties */
1753
+                    memcpy(op, ip, 8);
1754
+                    if (length > 8) { memcpy(op+8, ip+8, 8); }
1755
+                }
1756
+                ip += length; op = cpy;
1526 1757
             }
1527 1758
 
1528
-        } else {
1529
-            LZ4_wildCopy(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
1530
-            ip += length; op = cpy;
1531
-        }
1759
+            /* get offset */
1760
+            offset = LZ4_readLE16(ip); ip+=2;
1761
+            match = op - offset;
1762
+            assert(match <= op);
1763
+
1764
+            /* get matchlength */
1765
+            length = token & ML_MASK;
1766
+
1767
+            if (length == ML_MASK) {
1768
+              variable_length_error error = ok;
1769
+              if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
1770
+              length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
1771
+              if (error != ok) { goto _output_error; }
1772
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
1773
+                length += MINMATCH;
1774
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
1775
+                    goto safe_match_copy;
1776
+                }
1777
+            } else {
1778
+                length += MINMATCH;
1779
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
1780
+                    goto safe_match_copy;
1781
+                }
1532 1782
 
1533
-        /* get offset */
1534
-        offset = LZ4_readLE16(ip); ip+=2;
1535
-        match = op - offset;
1783
+                /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
1784
+                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
1785
+                    if (offset >= 8) {
1786
+                        assert(match >= lowPrefix);
1787
+                        assert(match <= op);
1788
+                        assert(op + 18 <= oend);
1789
+
1790
+                        memcpy(op, match, 8);
1791
+                        memcpy(op+8, match+8, 8);
1792
+                        memcpy(op+16, match+16, 2);
1793
+                        op += length;
1794
+                        continue;
1795
+            }   }   }
1796
+
1797
+            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
1798
+            /* match starting within external dictionary */
1799
+            if ((dict==usingExtDict) && (match < lowPrefix)) {
1800
+                if (unlikely(op+length > oend-LASTLITERALS)) {
1801
+                    if (partialDecoding) {
1802
+                        length = MIN(length, (size_t)(oend-op));  /* reach end of buffer */
1803
+                    } else {
1804
+                        goto _output_error;  /* end-of-block condition violated */
1805
+                }   }
1536 1806
 
1537
-        /* get matchlength */
1538
-        length = token & ML_MASK;
1807
+                if (length <= (size_t)(lowPrefix-match)) {
1808
+                    /* match fits entirely within external dictionary : just copy */
1809
+                    memmove(op, dictEnd - (lowPrefix-match), length);
1810
+                    op += length;
1811
+                } else {
1812
+                    /* match stretches into both external dictionary and current block */
1813
+                    size_t const copySize = (size_t)(lowPrefix - match);
1814
+                    size_t const restSize = length - copySize;
1815
+                    memcpy(op, dictEnd - copySize, copySize);
1816
+                    op += copySize;
1817
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
1818
+                        BYTE* const endOfMatch = op + restSize;
1819
+                        const BYTE* copyFrom = lowPrefix;
1820
+                        while (op < endOfMatch) { *op++ = *copyFrom++; }
1821
+                    } else {
1822
+                        memcpy(op, lowPrefix, restSize);
1823
+                        op += restSize;
1824
+                }   }
1825
+                continue;
1826
+            }
1539 1827
 
1540
-_copy_match:
1541
-        if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
1542
-        if (!partialDecoding) {
1543
-            assert(oend > op);
1544
-            assert(oend - op >= 4);
1545
-            LZ4_write32(op, 0);   /* silence an msan warning when offset==0; costs <1%; */
1546
-        }   /* note : when partialDecoding, there is no guarantee that at least 4 bytes remain available in output buffer */
1828
+            /* copy match within block */
1829
+            cpy = op + length;
1547 1830
 
1548
-        if (length == ML_MASK) {
1549
-            unsigned s;
1550
-            do {
1551
-                s = *ip++;
1552
-                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
1553
-                length += s;
1554
-            } while (s==255);
1555
-            if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
1831
+            assert((op <= oend) && (oend-op >= 32));
1832
+            if (unlikely(offset<16)) {
1833
+                LZ4_memcpy_using_offset(op, match, cpy, offset);
1834
+            } else {
1835
+                LZ4_wildCopy32(op, match, cpy);
1836
+            }
1837
+
1838
+            op = cpy;   /* wildcopy correction */
1556 1839
         }
1557
-        length += MINMATCH;
1840
+    safe_decode:
1841
+#endif
1842
+
1843
+        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
1844
+        while (1) {
1845
+            token = *ip++;
1846
+            length = token >> ML_BITS;  /* literal length */
1847
+
1848
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
1849
+
1850
+            /* A two-stage shortcut for the most common case:
1851
+             * 1) If the literal length is 0..14, and there is enough space,
1852
+             * enter the shortcut and copy 16 bytes on behalf of the literals
1853
+             * (in the fast mode, only 8 bytes can be safely copied this way).
1854
+             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
1855
+             * manner; but we ensure that there's enough space in the output for
1856
+             * those 18 bytes earlier, upon entering the shortcut (in other words,
1857
+             * there is a combined check for both stages).
1858
+             */
1859
+            if ( (endOnInput ? length != RUN_MASK : length <= 8)
1860
+                /* strictly "less than" on input, to re-enter the loop with at least one byte */
1861
+              && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
1862
+                /* Copy the literals */
1863
+                memcpy(op, ip, endOnInput ? 16 : 8);
1864
+                op += length; ip += length;
1865
+
1866
+                /* The second stage: prepare for match copying, decode full info.
1867
+                 * If it doesn't work out, the info won't be wasted. */
1868
+                length = token & ML_MASK; /* match length */
1869
+                offset = LZ4_readLE16(ip); ip += 2;
1870
+                match = op - offset;
1871
+                assert(match <= op); /* check overflow */
1872
+
1873
+                /* Do not deal with overlapping matches. */
1874
+                if ( (length != ML_MASK)
1875
+                  && (offset >= 8)
1876
+                  && (dict==withPrefix64k || match >= lowPrefix) ) {
1877
+                    /* Copy the match. */
1878
+                    memcpy(op + 0, match + 0, 8);
1879
+                    memcpy(op + 8, match + 8, 8);
1880
+                    memcpy(op +16, match +16, 2);
1881
+                    op += length + MINMATCH;
1882
+                    /* Both stages worked, load the next token. */
1883
+                    continue;
1884
+                }
1885
+
1886
+                /* The second stage didn't work out, but the info is ready.
1887
+                 * Propel it right to the point of match copying. */
1888
+                goto _copy_match;
1889
+            }
1558 1890
 
1559
-        /* match starting within external dictionary */
1560
-        if ((dict==usingExtDict) && (match < lowPrefix)) {
1561
-            if (unlikely(op+length > oend-LASTLITERALS)) {
1562
-                if (partialDecoding) length = MIN(length, (size_t)(oend-op));
1563
-                else goto _output_error;   /* doesn't respect parsing restriction */
1891
+            /* decode literal length */
1892
+            if (length == RUN_MASK) {
1893
+                variable_length_error error = ok;
1894
+                length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
1895
+                if (error == initial_error) { goto _output_error; }
1896
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
1897
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
1564 1898
             }
1565 1899
 
1566
-            if (length <= (size_t)(lowPrefix-match)) {
1567
-                /* match fits entirely within external dictionary : just copy */
1568
-                memmove(op, dictEnd - (lowPrefix-match), length);
1900
+            /* copy literals */
1901
+            cpy = op+length;
1902
+#if LZ4_FAST_DEC_LOOP
1903
+        safe_literal_copy:
1904
+#endif
1905
+            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
1906
+            if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
1907
+              || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
1908
+            {
1909
+                /* We've either hit the input parsing restriction or the output parsing restriction.
1910
+                 * If we've hit the input parsing condition then this must be the last sequence.
1911
+                 * If we've hit the output parsing condition then we are either using partialDecoding
1912
+                 * or we've hit the output parsing condition.
1913
+                 */
1914
+                if (partialDecoding) {
1915
+                    /* Since we are partial decoding we may be in this block because of the output parsing
1916
+                     * restriction, which is not valid since the output buffer is allowed to be undersized.
1917
+                     */
1918
+                    assert(endOnInput);
1919
+                    /* If we're in this block because of the input parsing condition, then we must be on the
1920
+                     * last sequence (or invalid), so we must check that we exactly consume the input.
1921
+                     */
1922
+                    if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend)) { goto _output_error; }
1923
+                    assert(ip+length <= iend);
1924
+                    /* We are finishing in the middle of a literals segment.
1925
+                     * Break after the copy.
1926
+                     */
1927
+                    if (cpy > oend) {
1928
+                        cpy = oend;
1929
+                        assert(op<=oend);
1930
+                        length = (size_t)(oend-op);
1931
+                    }
1932
+                    assert(ip+length <= iend);
1933
+                } else {
1934
+                    /* We must be on the last sequence because of the parsing limitations so check
1935
+                     * that we exactly regenerate the original size (must be exact when !endOnInput).
1936
+                     */
1937
+                    if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
1938
+                     /* We must be on the last sequence (or invalid) because of the parsing limitations
1939
+                      * so check that we exactly consume the input and don't overrun the output buffer.
1940
+                      */
1941
+                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; }
1942
+                }
1943
+                memmove(op, ip, length);  /* supports overlapping memory regions, which only matters for in-place decompression scenarios */
1944
+                ip += length;
1569 1945
                 op += length;
1946
+                /* Necessarily EOF when !partialDecoding. When partialDecoding
1947
+                 * it is EOF if we've either filled the output buffer or hit
1948
+                 * the input parsing restriction.
1949
+                 */
1950
+                if (!partialDecoding || (cpy == oend) || (ip == iend)) {
1951
+                    break;
1952
+                }
1570 1953
             } else {
1571
-                /* match stretches into both external dictionary and current block */
1572
-                size_t const copySize = (size_t)(lowPrefix - match);
1573
-                size_t const restSize = length - copySize;
1574
-                memcpy(op, dictEnd - copySize, copySize);
1575
-                op += copySize;
1576
-                if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
1577
-                    BYTE* const endOfMatch = op + restSize;
1578
-                    const BYTE* copyFrom = lowPrefix;
1579
-                    while (op < endOfMatch) *op++ = *copyFrom++;
1954
+                LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
1955
+                ip += length; op = cpy;
1956
+            }
1957
+
1958
+            /* get offset */
1959
+            offset = LZ4_readLE16(ip); ip+=2;
1960
+            match = op - offset;
1961
+
1962
+            /* get matchlength */
1963
+            length = token & ML_MASK;
1964
+
1965
+    _copy_match:
1966
+            if (length == ML_MASK) {
1967
+              variable_length_error error = ok;
1968
+              length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
1969
+              if (error != ok) goto _output_error;
1970
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
1971
+            }
1972
+            length += MINMATCH;
1973
+
1974
+#if LZ4_FAST_DEC_LOOP
1975
+        safe_match_copy:
1976
+#endif
1977
+            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
1978
+            /* match starting within external dictionary */
1979
+            if ((dict==usingExtDict) && (match < lowPrefix)) {
1980
+                if (unlikely(op+length > oend-LASTLITERALS)) {
1981
+                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
1982
+                    else goto _output_error;   /* doesn't respect parsing restriction */
1983
+                }
1984
+
1985
+                if (length <= (size_t)(lowPrefix-match)) {
1986
+                    /* match fits entirely within external dictionary : just copy */
1987
+                    memmove(op, dictEnd - (lowPrefix-match), length);
1988
+                    op += length;
1580 1989
                 } else {
1581
-                    memcpy(op, lowPrefix, restSize);
1582
-                    op += restSize;
1583
-            }   }
1584
-            continue;
1585
-        }
1990
+                    /* match stretches into both external dictionary and current block */
1991
+                    size_t const copySize = (size_t)(lowPrefix - match);
1992
+                    size_t const restSize = length - copySize;
1993
+                    memcpy(op, dictEnd - copySize, copySize);
1994
+                    op += copySize;
1995
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
1996
+                        BYTE* const endOfMatch = op + restSize;
1997
+                        const BYTE* copyFrom = lowPrefix;
1998
+                        while (op < endOfMatch) *op++ = *copyFrom++;
1999
+                    } else {
2000
+                        memcpy(op, lowPrefix, restSize);
2001
+                        op += restSize;
2002
+                }   }
2003
+                continue;
2004
+            }
2005
+            assert(match >= lowPrefix);
2006
+
2007
+            /* copy match within block */
2008
+            cpy = op + length;
2009
+
2010
+            /* partialDecoding : may end anywhere within the block */
2011
+            assert(op<=oend);
2012
+            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
2013
+                size_t const mlen = MIN(length, (size_t)(oend-op));
2014
+                const BYTE* const matchEnd = match + mlen;
2015
+                BYTE* const copyEnd = op + mlen;
2016
+                if (matchEnd > op) {   /* overlap copy */
2017
+                    while (op < copyEnd) { *op++ = *match++; }
2018
+                } else {
2019
+                    memcpy(op, match, mlen);
2020
+                }
2021
+                op = copyEnd;
2022
+                if (op == oend) { break; }
2023
+                continue;
2024
+            }
1586 2025
 
1587
-        /* copy match within block */
1588
-        cpy = op + length;
1589
-
1590
-        /* partialDecoding : may not respect endBlock parsing restrictions */
1591
-        assert(op<=oend);
1592
-        if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
1593
-            size_t const mlen = MIN(length, (size_t)(oend-op));
1594
-            const BYTE* const matchEnd = match + mlen;
1595
-            BYTE* const copyEnd = op + mlen;
1596
-            if (matchEnd > op) {   /* overlap copy */
1597
-                while (op < copyEnd) *op++ = *match++;
2026
+            if (unlikely(offset<8)) {
2027
+                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
2028
+                op[0] = match[0];
2029
+                op[1] = match[1];
2030
+                op[2] = match[2];
2031
+                op[3] = match[3];
2032
+                match += inc32table[offset];
2033
+                memcpy(op+4, match, 4);
2034
+                match -= dec64table[offset];
1598 2035
             } else {
1599
-                memcpy(op, match, mlen);
2036
+                memcpy(op, match, 8);
2037
+                match += 8;
1600 2038
             }
1601
-            op = copyEnd;
1602
-            if (op==oend) break;
1603
-            continue;
1604
-        }
1605
-
1606
-        if (unlikely(offset<8)) {
1607
-            op[0] = match[0];
1608
-            op[1] = match[1];
1609
-            op[2] = match[2];
1610
-            op[3] = match[3];
1611
-            match += inc32table[offset];
1612
-            memcpy(op+4, match, 4);
1613
-            match -= dec64table[offset];
1614
-        } else {
1615
-            memcpy(op, match, 8);
1616
-            match += 8;
1617
-        }
1618
-        op += 8;
1619
-
1620
-        if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
1621
-            BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
1622
-            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
1623
-            if (op < oCopyLimit) {
1624
-                LZ4_wildCopy(op, match, oCopyLimit);
1625
-                match += oCopyLimit - op;
1626
-                op = oCopyLimit;
2039
+            op += 8;
2040
+
2041
+            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
2042
+                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
2043
+                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
2044
+                if (op < oCopyLimit) {
2045
+                    LZ4_wildCopy8(op, match, oCopyLimit);
2046
+                    match += oCopyLimit - op;
2047
+                    op = oCopyLimit;
2048
+                }
2049
+                while (op < cpy) { *op++ = *match++; }
2050
+            } else {
2051
+                memcpy(op, match, 8);
2052
+                if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
1627 2053
             }
1628
-            while (op < cpy) *op++ = *match++;
1629
-        } else {
1630
-            memcpy(op, match, 8);
1631
-            if (length > 16) LZ4_wildCopy(op+8, match+8, cpy);
2054
+            op = cpy;   /* wildcopy correction */
1632 2055
         }
1633
-        op = cpy;   /* wildcopy correction */
1634
-    }
1635 2056
 
1636
-    /* end of decoding */
1637
-    if (endOnInput)
1638
-       return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
1639
-    else
1640
-       return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
2057
+        /* end of decoding */
2058
+        if (endOnInput) {
2059
+           return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
2060
+       } else {
2061
+           return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
2062
+       }
1641 2063
 
1642
-    /* Overflow error detected */
1643
-_output_error:
1644
-    return (int) (-(((const char*)ip)-src))-1;
2064
+        /* Overflow error detected */
2065
+    _output_error:
2066
+        return (int) (-(((const char*)ip)-src))-1;
2067
+    }
1645 2068
 }
1646 2069
 
1647 2070
 
... ...
@@ -1745,12 +2168,13 @@ int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalS
1745 2168
 LZ4_streamDecode_t* LZ4_createStreamDecode(void)
1746 2169
 {
1747 2170
     LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
2171
+    LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal));    /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */
1748 2172
     return lz4s;
1749 2173
 }
1750 2174