• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* inffast_chunk.c -- fast decoding
2  * Copyright (C) 1995-2017 Mark Adler
3  * Copyright 2023 The Chromium Authors
4  * For conditions of distribution and use, see copyright notice in zlib.h
5  */
6 
7 #include "zutil.h"
8 #include "inftrees.h"
9 #include "inflate.h"
10 #include "contrib/optimizations/inffast_chunk.h"
11 #include "contrib/optimizations/chunkcopy.h"
12 
13 #ifdef ASMINF
14 #  pragma message("Assembler code may have bugs -- use at your own risk")
15 #else
16 
17 /*
18    Decode literal, length, and distance codes and write out the resulting
19    literal and match bytes until either not enough input or output is
20    available, an end-of-block is encountered, or a data error is encountered.
21    When large enough input and output buffers are supplied to inflate(), for
22    example, a 16K input buffer and a 64K output buffer, more than 95% of the
23    inflate() execution time is spent in this routine.
24 
25    Entry assumptions:
26 
27         state->mode == LEN
28         strm->avail_in >= INFLATE_FAST_MIN_INPUT (6 or 8 bytes + 7 bytes)
29         strm->avail_out >= INFLATE_FAST_MIN_OUTPUT (258 bytes + 2 bytes)
30         start >= strm->avail_out
31         state->bits < 8
32         (state->hold >> state->bits) == 0
33         strm->next_out[0..strm->avail_out] does not overlap with
34               strm->next_in[0..strm->avail_in]
35         strm->state->window is allocated with an additional
36               CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize
37 
38    On return, state->mode is one of:
39 
40         LEN -- ran out of enough output space or enough available input
41         TYPE -- reached end of block code, inflate() to interpret next block
42         BAD -- error in block data
43 
44    Notes:
45 
46     INFLATE_FAST_MIN_INPUT: 6 or 8 bytes + 7 bytes
47 
48     - The maximum input bits used by a length/distance pair is 15 bits for the
49       length code, 5 bits for the length extra, 15 bits for the distance code,
50       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
51       Therefore if strm->avail_in >= 6, then there is enough input to avoid
52       checking for available input while decoding.
53 
54     - The wide input data reading option reads 64 input bits at a time. Thus,
55       if strm->avail_in >= 8, then there is enough input to avoid checking for
56       available input while decoding. Reading consumes the input with:
57 
58           hold |= read64le(in) << bits;
59           in += 6;
60           bits += 48;
61 
62       reporting 6 bytes of new input because |bits| is 0..15 (2 bytes rounded
63       up, worst case) and 6 bytes is enough to decode as noted above. At exit,
64       hold &= (1U << bits) - 1 drops excess input to keep the invariant:
65 
66           (state->hold >> state->bits) == 0
67 
68     INFLATE_FAST_MIN_OUTPUT: 258 bytes + 2 bytes for literals = 260 bytes
69 
70     - The maximum bytes that a single length/distance pair can output is 258
71       bytes, which is the maximum length that can be coded.  inflate_fast()
72       requires strm->avail_out >= 260 for each loop to avoid checking for
73       available output space while decoding.
74  */
inflate_fast_chunk_(z_streamp strm,unsigned start)75 void ZLIB_INTERNAL inflate_fast_chunk_(z_streamp strm, unsigned start) {
76     struct inflate_state FAR *state;
77     z_const unsigned char FAR *in;      /* local strm->next_in */
78     z_const unsigned char FAR *last;    /* have enough input while in < last */
79     unsigned char FAR *out;     /* local strm->next_out */
80     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
81     unsigned char FAR *end;     /* while out < end, enough space available */
82     unsigned char FAR *limit;   /* safety limit for chunky copies */
83 #ifdef INFLATE_STRICT
84     unsigned dmax;              /* maximum distance from zlib header */
85 #endif
86     unsigned wsize;             /* window size or zero if not using window */
87     unsigned whave;             /* valid bytes in the window */
88     unsigned wnext;             /* window write index */
89     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
90     inflate_holder_t hold;      /* local strm->hold */
91     unsigned bits;              /* local strm->bits */
92     code const FAR *lcode;      /* local strm->lencode */
93     code const FAR *dcode;      /* local strm->distcode */
94     unsigned lmask;             /* mask for first level of length codes */
95     unsigned dmask;             /* mask for first level of distance codes */
96     code const *here;           /* retrieved table entry */
97     unsigned op;                /* code bits, operation, extra bits, or */
98                                 /*  window position, window bytes to copy */
99     unsigned len;               /* match length, unused bytes */
100     unsigned dist;              /* match distance */
101     unsigned char FAR *from;    /* where to copy match from */
102 
103     /* copy state to local variables */
104     state = (struct inflate_state FAR *)strm->state;
105     in = strm->next_in;
106     last = in + (strm->avail_in - (INFLATE_FAST_MIN_INPUT - 1));
107     out = strm->next_out;
108     beg = out - (start - strm->avail_out);
109     end = out + (strm->avail_out - (INFLATE_FAST_MIN_OUTPUT - 1));
110     limit = out + strm->avail_out;
111 #ifdef INFLATE_STRICT
112     dmax = state->dmax;
113 #endif
114     wsize = state->wsize;
115     whave = state->whave;
116     wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext;
117     window = state->window;
118     hold = state->hold;
119     bits = state->bits;
120     lcode = state->lencode;
121     dcode = state->distcode;
122     lmask = (1U << state->lenbits) - 1;
123     dmask = (1U << state->distbits) - 1;
124 
125 #ifdef INFLATE_CHUNK_READ_64LE
126 #define REFILL() do { \
127         Assert(bits < 64, "### Too many bits in inflate_fast."); \
128         hold |= read64le(in) << bits; \
129         in += 7; \
130         in -= bits >> 3; \
131         bits |= 56; \
132     } while (0)
133 #endif
134 
135     /* decode literals and length/distances until end-of-block or not enough
136        input data or output space */
137     do {
138 #ifdef INFLATE_CHUNK_READ_64LE
139         REFILL();
140 #else
141         if (bits < 15) {
142             hold += (unsigned long)(*in++) << bits;
143             bits += 8;
144             hold += (unsigned long)(*in++) << bits;
145             bits += 8;
146         }
147 #endif
148         here = lcode + (hold & lmask);
149 #ifdef INFLATE_CHUNK_READ_64LE
150         if (here->op == 0) {                    /* literal */
151             Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
152                     "inflate:         literal '%c'\n" :
153                     "inflate:         literal 0x%02x\n", here->val));
154             *out++ = (unsigned char)(here->val);
155             hold >>= here->bits;
156             bits -= here->bits;
157             here = lcode + (hold & lmask);
158             if (here->op == 0) {                /* literal */
159                 Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
160                         "inflate:    2nd  literal '%c'\n" :
161                         "inflate:    2nd  literal 0x%02x\n", here->val));
162                 *out++ = (unsigned char)(here->val);
163                 hold >>= here->bits;
164                 bits -= here->bits;
165                 here = lcode + (hold & lmask);
166             }
167         }
168 #endif
169       dolen:
170         op = (unsigned)(here->bits);
171         hold >>= op;
172         bits -= op;
173         op = (unsigned)(here->op);
174         if (op == 0) {                          /* literal */
175             Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
176                     "inflate:         literal '%c'\n" :
177                     "inflate:         literal 0x%02x\n", here->val));
178             *out++ = (unsigned char)(here->val);
179         }
180         else if (op & 16) {                     /* length base */
181             len = (unsigned)(here->val);
182             op &= 15;                           /* number of extra bits */
183             if (op) {
184 #ifndef INFLATE_CHUNK_READ_64LE
185                 if (bits < op) {
186                     hold += (unsigned long)(*in++) << bits;
187                     bits += 8;
188                 }
189 #endif
190                 len += (unsigned)hold & ((1U << op) - 1);
191                 hold >>= op;
192                 bits -= op;
193             }
194             Tracevv((stderr, "inflate:         length %u\n", len));
195 #ifndef INFLATE_CHUNK_READ_64LE
196             if (bits < 15) {
197                 hold += (unsigned long)(*in++) << bits;
198                 bits += 8;
199                 hold += (unsigned long)(*in++) << bits;
200                 bits += 8;
201             }
202 #endif
203             here = dcode + (hold & dmask);
204           dodist:
205             op = (unsigned)(here->bits);
206             hold >>= op;
207             bits -= op;
208             op = (unsigned)(here->op);
209             if (op & 16) {                      /* distance base */
210                 dist = (unsigned)(here->val);
211                 op &= 15;                       /* number of extra bits */
212                 /* we have two fast-path loads: 10+10 + 15+5 + 15 = 55,
213                    but we may need to refill here in the worst case */
214                 if (bits < op) {
215 #ifdef INFLATE_CHUNK_READ_64LE
216                     REFILL();
217 #else
218                     hold += (unsigned long)(*in++) << bits;
219                     bits += 8;
220                     if (bits < op) {
221                         hold += (unsigned long)(*in++) << bits;
222                         bits += 8;
223                     }
224 #endif
225                 }
226                 dist += (unsigned)hold & ((1U << op) - 1);
227 #ifdef INFLATE_STRICT
228                 if (dist > dmax) {
229                     strm->msg = (char *)"invalid distance too far back";
230                     state->mode = BAD;
231                     break;
232                 }
233 #endif
234                 hold >>= op;
235                 bits -= op;
236                 Tracevv((stderr, "inflate:         distance %u\n", dist));
237                 op = (unsigned)(out - beg);     /* max distance in output */
238                 if (dist > op) {                /* see if copy from window */
239                     op = dist - op;             /* distance back in window */
240                     if (op > whave) {
241                         if (state->sane) {
242                             strm->msg =
243                                 (char *)"invalid distance too far back";
244                             state->mode = BAD;
245                             break;
246                         }
247 #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
248                         if (len <= op - whave) {
249                             do {
250                                 *out++ = 0;
251                             } while (--len);
252                             continue;
253                         }
254                         len -= op - whave;
255                         do {
256                             *out++ = 0;
257                         } while (--op > whave);
258                         if (op == 0) {
259                             from = out - dist;
260                             do {
261                                 *out++ = *from++;
262                             } while (--len);
263                             continue;
264                         }
265 #endif
266                     }
267                     from = window;
268                     if (wnext >= op) {          /* contiguous in window */
269                         from += wnext - op;
270                     }
271                     else {                      /* wrap around window */
272                         op -= wnext;
273                         from += wsize - op;
274                         if (op < len) {         /* some from end of window */
275                             len -= op;
276                             out = chunkcopy_safe(out, from, op, limit);
277                             from = window;      /* more from start of window */
278                             op = wnext;
279                             /* This (rare) case can create a situation where
280                                the first chunkcopy below must be checked.
281                              */
282                         }
283                     }
284                     if (op < len) {             /* still need some from output */
285                         out = chunkcopy_safe(out, from, op, limit);
286                         len -= op;
287                         /* When dist is small the amount of data that can be
288                            copied from the window is also small, and progress
289                            towards the dangerous end of the output buffer is
290                            also small.  This means that for trivial memsets and
291                            for chunkunroll_relaxed() a safety check is
292                            unnecessary.  However, these conditions may not be
293                            entered at all, and in that case it's possible that
294                            the main copy is near the end.
295                           */
296                         out = chunkunroll_relaxed(out, &dist, &len);
297                         out = chunkcopy_safe_ugly(out, dist, len, limit);
298                     } else {
299                         /* from points to window, so there is no risk of
300                            overlapping pointers requiring memset-like behaviour
301                          */
302                         out = chunkcopy_safe(out, from, len, limit);
303                     }
304                 }
305                 else {
306                     /* Whole reference is in range of current output.  No
307                        range checks are necessary because we start with room
308                        for at least 258 bytes of output, so unroll and roundoff
309                        operations can write beyond `out+len` so long as they
310                        stay within 258 bytes of `out`.
311                      */
312                     out = chunkcopy_lapped_relaxed(out, dist, len);
313                 }
314             }
315             else if ((op & 64) == 0) {          /* 2nd level distance code */
316                 here = dcode + here->val + (hold & ((1U << op) - 1));
317                 goto dodist;
318             }
319             else {
320                 strm->msg = (char *)"invalid distance code";
321                 state->mode = BAD;
322                 break;
323             }
324         }
325         else if ((op & 64) == 0) {              /* 2nd level length code */
326             here = lcode + here->val + (hold & ((1U << op) - 1));
327             goto dolen;
328         }
329         else if (op & 32) {                     /* end-of-block */
330             Tracevv((stderr, "inflate:         end of block\n"));
331             state->mode = TYPE;
332             break;
333         }
334         else {
335             strm->msg = (char *)"invalid literal/length code";
336             state->mode = BAD;
337             break;
338         }
339     } while (in < last && out < end);
340 
341     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
342     len = bits >> 3;
343     in -= len;
344     bits -= len << 3;
345     hold &= (1U << bits) - 1;
346 
347     /* update state and return */
348     strm->next_in = in;
349     strm->next_out = out;
350     strm->avail_in = (unsigned)(in < last ?
351         (INFLATE_FAST_MIN_INPUT - 1) + (last - in) :
352         (INFLATE_FAST_MIN_INPUT - 1) - (in - last));
353     strm->avail_out = (unsigned)(out < end ?
354         (INFLATE_FAST_MIN_OUTPUT - 1) + (end - out) :
355         (INFLATE_FAST_MIN_OUTPUT - 1) - (out - end));
356     state->hold = hold;
357     state->bits = bits;
358 
359     Assert((state->hold >> state->bits) == 0, "invalid input data state");
360 }
361 
362 /*
363    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
364    - Using bit fields for code structure
365    - Different op definition to avoid & for extra bits (do & for table bits)
366    - Three separate decoding do-loops for direct, window, and wnext == 0
367    - Special case for distance > 1 copies to do overlapped load and store copy
368    - Explicit branch predictions (based on measured branch probabilities)
369    - Deferring match copy and interspersed it with decoding subsequent codes
370    - Swapping literal/length else
371    - Swapping window/direct else
372    - Larger unrolled copy loops (three is about right)
373    - Moving len -= 3 statement into middle of loop
374  */
375 
376 #endif /* !ASMINF */
377