1 /* inffast_chunk.c -- fast decoding
2 * Copyright (C) 1995-2017 Mark Adler
3 * Copyright 2023 The Chromium Authors
4 * For conditions of distribution and use, see copyright notice in zlib.h
5 */
6
7 #include "zutil.h"
8 #include "inftrees.h"
9 #include "inflate.h"
10 #include "contrib/optimizations/inffast_chunk.h"
11 #include "contrib/optimizations/chunkcopy.h"
12
13 #ifdef ASMINF
14 # pragma message("Assembler code may have bugs -- use at your own risk")
15 #else
16
17 /*
18 Decode literal, length, and distance codes and write out the resulting
19 literal and match bytes until either not enough input or output is
20 available, an end-of-block is encountered, or a data error is encountered.
21 When large enough input and output buffers are supplied to inflate(), for
22 example, a 16K input buffer and a 64K output buffer, more than 95% of the
23 inflate() execution time is spent in this routine.
24
25 Entry assumptions:
26
27 state->mode == LEN
28 strm->avail_in >= INFLATE_FAST_MIN_INPUT (6 or 8 bytes + 7 bytes)
29 strm->avail_out >= INFLATE_FAST_MIN_OUTPUT (258 bytes + 2 bytes)
30 start >= strm->avail_out
31 state->bits < 8
32 (state->hold >> state->bits) == 0
33 strm->next_out[0..strm->avail_out] does not overlap with
34 strm->next_in[0..strm->avail_in]
35 strm->state->window is allocated with an additional
36 CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize
37
38 On return, state->mode is one of:
39
40 LEN -- ran out of enough output space or enough available input
41 TYPE -- reached end of block code, inflate() to interpret next block
42 BAD -- error in block data
43
44 Notes:
45
46 INFLATE_FAST_MIN_INPUT: 6 or 8 bytes + 7 bytes
47
48 - The maximum input bits used by a length/distance pair is 15 bits for the
49 length code, 5 bits for the length extra, 15 bits for the distance code,
50 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
51 Therefore if strm->avail_in >= 6, then there is enough input to avoid
52 checking for available input while decoding.
53
54 - The wide input data reading option reads 64 input bits at a time. Thus,
55 if strm->avail_in >= 8, then there is enough input to avoid checking for
56 available input while decoding. Reading consumes the input with:
57
58 hold |= read64le(in) << bits;
59 in += 6;
60 bits += 48;
61
62 reporting 6 bytes of new input because |bits| is 0..15 (2 bytes rounded
63 up, worst case) and 6 bytes is enough to decode as noted above. At exit,
64 hold &= (1U << bits) - 1 drops excess input to keep the invariant:
65
66 (state->hold >> state->bits) == 0
67
68 INFLATE_FAST_MIN_OUTPUT: 258 bytes + 2 bytes for literals = 260 bytes
69
70 - The maximum bytes that a single length/distance pair can output is 258
71 bytes, which is the maximum length that can be coded. inflate_fast()
72 requires strm->avail_out >= 260 for each loop to avoid checking for
73 available output space while decoding.
74 */
inflate_fast_chunk_(z_streamp strm,unsigned start)75 void ZLIB_INTERNAL inflate_fast_chunk_(z_streamp strm, unsigned start) {
76 struct inflate_state FAR *state;
77 z_const unsigned char FAR *in; /* local strm->next_in */
78 z_const unsigned char FAR *last; /* have enough input while in < last */
79 unsigned char FAR *out; /* local strm->next_out */
80 unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
81 unsigned char FAR *end; /* while out < end, enough space available */
82 unsigned char FAR *limit; /* safety limit for chunky copies */
83 #ifdef INFLATE_STRICT
84 unsigned dmax; /* maximum distance from zlib header */
85 #endif
86 unsigned wsize; /* window size or zero if not using window */
87 unsigned whave; /* valid bytes in the window */
88 unsigned wnext; /* window write index */
89 unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
90 inflate_holder_t hold; /* local strm->hold */
91 unsigned bits; /* local strm->bits */
92 code const FAR *lcode; /* local strm->lencode */
93 code const FAR *dcode; /* local strm->distcode */
94 unsigned lmask; /* mask for first level of length codes */
95 unsigned dmask; /* mask for first level of distance codes */
96 code const *here; /* retrieved table entry */
97 unsigned op; /* code bits, operation, extra bits, or */
98 /* window position, window bytes to copy */
99 unsigned len; /* match length, unused bytes */
100 unsigned dist; /* match distance */
101 unsigned char FAR *from; /* where to copy match from */
102
103 /* copy state to local variables */
104 state = (struct inflate_state FAR *)strm->state;
105 in = strm->next_in;
106 last = in + (strm->avail_in - (INFLATE_FAST_MIN_INPUT - 1));
107 out = strm->next_out;
108 beg = out - (start - strm->avail_out);
109 end = out + (strm->avail_out - (INFLATE_FAST_MIN_OUTPUT - 1));
110 limit = out + strm->avail_out;
111 #ifdef INFLATE_STRICT
112 dmax = state->dmax;
113 #endif
114 wsize = state->wsize;
115 whave = state->whave;
116 wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext;
117 window = state->window;
118 hold = state->hold;
119 bits = state->bits;
120 lcode = state->lencode;
121 dcode = state->distcode;
122 lmask = (1U << state->lenbits) - 1;
123 dmask = (1U << state->distbits) - 1;
124
125 #ifdef INFLATE_CHUNK_READ_64LE
126 #define REFILL() do { \
127 Assert(bits < 64, "### Too many bits in inflate_fast."); \
128 hold |= read64le(in) << bits; \
129 in += 7; \
130 in -= bits >> 3; \
131 bits |= 56; \
132 } while (0)
133 #endif
134
135 /* decode literals and length/distances until end-of-block or not enough
136 input data or output space */
137 do {
138 #ifdef INFLATE_CHUNK_READ_64LE
139 REFILL();
140 #else
141 if (bits < 15) {
142 hold += (unsigned long)(*in++) << bits;
143 bits += 8;
144 hold += (unsigned long)(*in++) << bits;
145 bits += 8;
146 }
147 #endif
148 here = lcode + (hold & lmask);
149 #ifdef INFLATE_CHUNK_READ_64LE
150 if (here->op == 0) { /* literal */
151 Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
152 "inflate: literal '%c'\n" :
153 "inflate: literal 0x%02x\n", here->val));
154 *out++ = (unsigned char)(here->val);
155 hold >>= here->bits;
156 bits -= here->bits;
157 here = lcode + (hold & lmask);
158 if (here->op == 0) { /* literal */
159 Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
160 "inflate: 2nd literal '%c'\n" :
161 "inflate: 2nd literal 0x%02x\n", here->val));
162 *out++ = (unsigned char)(here->val);
163 hold >>= here->bits;
164 bits -= here->bits;
165 here = lcode + (hold & lmask);
166 }
167 }
168 #endif
169 dolen:
170 op = (unsigned)(here->bits);
171 hold >>= op;
172 bits -= op;
173 op = (unsigned)(here->op);
174 if (op == 0) { /* literal */
175 Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
176 "inflate: literal '%c'\n" :
177 "inflate: literal 0x%02x\n", here->val));
178 *out++ = (unsigned char)(here->val);
179 }
180 else if (op & 16) { /* length base */
181 len = (unsigned)(here->val);
182 op &= 15; /* number of extra bits */
183 if (op) {
184 #ifndef INFLATE_CHUNK_READ_64LE
185 if (bits < op) {
186 hold += (unsigned long)(*in++) << bits;
187 bits += 8;
188 }
189 #endif
190 len += (unsigned)hold & ((1U << op) - 1);
191 hold >>= op;
192 bits -= op;
193 }
194 Tracevv((stderr, "inflate: length %u\n", len));
195 #ifndef INFLATE_CHUNK_READ_64LE
196 if (bits < 15) {
197 hold += (unsigned long)(*in++) << bits;
198 bits += 8;
199 hold += (unsigned long)(*in++) << bits;
200 bits += 8;
201 }
202 #endif
203 here = dcode + (hold & dmask);
204 dodist:
205 op = (unsigned)(here->bits);
206 hold >>= op;
207 bits -= op;
208 op = (unsigned)(here->op);
209 if (op & 16) { /* distance base */
210 dist = (unsigned)(here->val);
211 op &= 15; /* number of extra bits */
212 /* we have two fast-path loads: 10+10 + 15+5 + 15 = 55,
213 but we may need to refill here in the worst case */
214 if (bits < op) {
215 #ifdef INFLATE_CHUNK_READ_64LE
216 REFILL();
217 #else
218 hold += (unsigned long)(*in++) << bits;
219 bits += 8;
220 if (bits < op) {
221 hold += (unsigned long)(*in++) << bits;
222 bits += 8;
223 }
224 #endif
225 }
226 dist += (unsigned)hold & ((1U << op) - 1);
227 #ifdef INFLATE_STRICT
228 if (dist > dmax) {
229 strm->msg = (char *)"invalid distance too far back";
230 state->mode = BAD;
231 break;
232 }
233 #endif
234 hold >>= op;
235 bits -= op;
236 Tracevv((stderr, "inflate: distance %u\n", dist));
237 op = (unsigned)(out - beg); /* max distance in output */
238 if (dist > op) { /* see if copy from window */
239 op = dist - op; /* distance back in window */
240 if (op > whave) {
241 if (state->sane) {
242 strm->msg =
243 (char *)"invalid distance too far back";
244 state->mode = BAD;
245 break;
246 }
247 #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
248 if (len <= op - whave) {
249 do {
250 *out++ = 0;
251 } while (--len);
252 continue;
253 }
254 len -= op - whave;
255 do {
256 *out++ = 0;
257 } while (--op > whave);
258 if (op == 0) {
259 from = out - dist;
260 do {
261 *out++ = *from++;
262 } while (--len);
263 continue;
264 }
265 #endif
266 }
267 from = window;
268 if (wnext >= op) { /* contiguous in window */
269 from += wnext - op;
270 }
271 else { /* wrap around window */
272 op -= wnext;
273 from += wsize - op;
274 if (op < len) { /* some from end of window */
275 len -= op;
276 out = chunkcopy_safe(out, from, op, limit);
277 from = window; /* more from start of window */
278 op = wnext;
279 /* This (rare) case can create a situation where
280 the first chunkcopy below must be checked.
281 */
282 }
283 }
284 if (op < len) { /* still need some from output */
285 out = chunkcopy_safe(out, from, op, limit);
286 len -= op;
287 /* When dist is small the amount of data that can be
288 copied from the window is also small, and progress
289 towards the dangerous end of the output buffer is
290 also small. This means that for trivial memsets and
291 for chunkunroll_relaxed() a safety check is
292 unnecessary. However, these conditions may not be
293 entered at all, and in that case it's possible that
294 the main copy is near the end.
295 */
296 out = chunkunroll_relaxed(out, &dist, &len);
297 out = chunkcopy_safe_ugly(out, dist, len, limit);
298 } else {
299 /* from points to window, so there is no risk of
300 overlapping pointers requiring memset-like behaviour
301 */
302 out = chunkcopy_safe(out, from, len, limit);
303 }
304 }
305 else {
306 /* Whole reference is in range of current output. No
307 range checks are necessary because we start with room
308 for at least 258 bytes of output, so unroll and roundoff
309 operations can write beyond `out+len` so long as they
310 stay within 258 bytes of `out`.
311 */
312 out = chunkcopy_lapped_relaxed(out, dist, len);
313 }
314 }
315 else if ((op & 64) == 0) { /* 2nd level distance code */
316 here = dcode + here->val + (hold & ((1U << op) - 1));
317 goto dodist;
318 }
319 else {
320 strm->msg = (char *)"invalid distance code";
321 state->mode = BAD;
322 break;
323 }
324 }
325 else if ((op & 64) == 0) { /* 2nd level length code */
326 here = lcode + here->val + (hold & ((1U << op) - 1));
327 goto dolen;
328 }
329 else if (op & 32) { /* end-of-block */
330 Tracevv((stderr, "inflate: end of block\n"));
331 state->mode = TYPE;
332 break;
333 }
334 else {
335 strm->msg = (char *)"invalid literal/length code";
336 state->mode = BAD;
337 break;
338 }
339 } while (in < last && out < end);
340
341 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
342 len = bits >> 3;
343 in -= len;
344 bits -= len << 3;
345 hold &= (1U << bits) - 1;
346
347 /* update state and return */
348 strm->next_in = in;
349 strm->next_out = out;
350 strm->avail_in = (unsigned)(in < last ?
351 (INFLATE_FAST_MIN_INPUT - 1) + (last - in) :
352 (INFLATE_FAST_MIN_INPUT - 1) - (in - last));
353 strm->avail_out = (unsigned)(out < end ?
354 (INFLATE_FAST_MIN_OUTPUT - 1) + (end - out) :
355 (INFLATE_FAST_MIN_OUTPUT - 1) - (out - end));
356 state->hold = hold;
357 state->bits = bits;
358
359 Assert((state->hold >> state->bits) == 0, "invalid input data state");
360 }
361
362 /*
363 inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
364 - Using bit fields for code structure
365 - Different op definition to avoid & for extra bits (do & for table bits)
366 - Three separate decoding do-loops for direct, window, and wnext == 0
367 - Special case for distance > 1 copies to do overlapped load and store copy
368 - Explicit branch predictions (based on measured branch probabilities)
369 - Deferring match copy and interspersed it with decoding subsequent codes
370 - Swapping literal/length else
371 - Swapping window/direct else
372 - Larger unrolled copy loops (three is about right)
373 - Moving len -= 3 statement into middle of loop
374 */
375
376 #endif /* !ASMINF */
377