• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* inffast_chunk.c -- fast decoding
2  * Copyright (C) 1995-2017 Mark Adler
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  */
5 
6 #include "zutil.h"
7 #include "inftrees.h"
8 #include "inflate.h"
9 #include "contrib/optimizations/inffast_chunk.h"
10 #include "contrib/optimizations/chunkcopy.h"
11 
12 #ifdef ASMINF
13 #  pragma message("Assembler code may have bugs -- use at your own risk")
14 #else
15 
16 /*
17    Decode literal, length, and distance codes and write out the resulting
18    literal and match bytes until either not enough input or output is
19    available, an end-of-block is encountered, or a data error is encountered.
20    When large enough input and output buffers are supplied to inflate(), for
21    example, a 16K input buffer and a 64K output buffer, more than 95% of the
22    inflate() execution time is spent in this routine.
23 
24    Entry assumptions:
25 
26         state->mode == LEN
27         strm->avail_in >= INFLATE_FAST_MIN_INPUT (6 or 8 bytes)
28         strm->avail_out >= INFLATE_FAST_MIN_OUTPUT (258 bytes)
29         start >= strm->avail_out
30         state->bits < 8
31         (state->hold >> state->bits) == 0
32         strm->next_out[0..strm->avail_out] does not overlap with
33               strm->next_in[0..strm->avail_in]
34         strm->state->window is allocated with an additional
35               CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize
36 
37    On return, state->mode is one of:
38 
39         LEN -- ran out of enough output space or enough available input
40         TYPE -- reached end of block code, inflate() to interpret next block
41         BAD -- error in block data
42 
43    Notes:
44 
45     INFLATE_FAST_MIN_INPUT: 6 or 8 bytes
46 
47     - The maximum input bits used by a length/distance pair is 15 bits for the
48       length code, 5 bits for the length extra, 15 bits for the distance code,
49       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
50       Therefore if strm->avail_in >= 6, then there is enough input to avoid
51       checking for available input while decoding.
52 
53     - The wide input data reading option reads 64 input bits at a time. Thus,
54       if strm->avail_in >= 8, then there is enough input to avoid checking for
55       available input while decoding. Reading consumes the input with:
56 
57           hold |= read64le(in) << bits;
58           in += 6;
59           bits += 48;
60 
61       reporting 6 bytes of new input because |bits| is 0..15 (2 bytes rounded
62       up, worst case) and 6 bytes is enough to decode as noted above. At exit,
63       hold &= (1U << bits) - 1 drops excess input to keep the invariant:
64 
65           (state->hold >> state->bits) == 0
66 
67     INFLATE_FAST_MIN_OUTPUT: 258 bytes
68 
69     - The maximum bytes that a single length/distance pair can output is 258
70       bytes, which is the maximum length that can be coded.  inflate_fast()
71       requires strm->avail_out >= 258 for each loop to avoid checking for
72       available output space while decoding.
73  */
inflate_fast_chunk_(strm,start)74 void ZLIB_INTERNAL inflate_fast_chunk_(strm, start)
75 z_streamp strm;
76 unsigned start;         /* inflate()'s starting value for strm->avail_out */
77 {
78     struct inflate_state FAR *state;
79     z_const unsigned char FAR *in;      /* local strm->next_in */
80     z_const unsigned char FAR *last;    /* have enough input while in < last */
81     unsigned char FAR *out;     /* local strm->next_out */
82     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
83     unsigned char FAR *end;     /* while out < end, enough space available */
84     unsigned char FAR *limit;   /* safety limit for chunky copies */
85 #ifdef INFLATE_STRICT
86     unsigned dmax;              /* maximum distance from zlib header */
87 #endif
88     unsigned wsize;             /* window size or zero if not using window */
89     unsigned whave;             /* valid bytes in the window */
90     unsigned wnext;             /* window write index */
91     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
92     inflate_holder_t hold;      /* local strm->hold */
93     unsigned bits;              /* local strm->bits */
94     code const FAR *lcode;      /* local strm->lencode */
95     code const FAR *dcode;      /* local strm->distcode */
96     unsigned lmask;             /* mask for first level of length codes */
97     unsigned dmask;             /* mask for first level of distance codes */
98     code here;                  /* retrieved table entry */
99     unsigned op;                /* code bits, operation, extra bits, or */
100                                 /*  window position, window bytes to copy */
101     unsigned len;               /* match length, unused bytes */
102     unsigned dist;              /* match distance */
103     unsigned char FAR *from;    /* where to copy match from */
104 
105     /* copy state to local variables */
106     state = (struct inflate_state FAR *)strm->state;
107     in = strm->next_in;
108     last = in + (strm->avail_in - (INFLATE_FAST_MIN_INPUT - 1));
109     out = strm->next_out;
110     beg = out - (start - strm->avail_out);
111     end = out + (strm->avail_out - (INFLATE_FAST_MIN_OUTPUT - 1));
112     limit = out + strm->avail_out;
113 #ifdef INFLATE_STRICT
114     dmax = state->dmax;
115 #endif
116     wsize = state->wsize;
117     whave = state->whave;
118     wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext;
119     window = state->window;
120     hold = state->hold;
121     bits = state->bits;
122     lcode = state->lencode;
123     dcode = state->distcode;
124     lmask = (1U << state->lenbits) - 1;
125     dmask = (1U << state->distbits) - 1;
126 
127     /* decode literals and length/distances until end-of-block or not enough
128        input data or output space */
129     do {
130         if (bits < 15) {
131 #ifdef INFLATE_CHUNK_READ_64LE
132             hold |= read64le(in) << bits;
133             in += 6;
134             bits += 48;
135 #else
136             hold += (unsigned long)(*in++) << bits;
137             bits += 8;
138             hold += (unsigned long)(*in++) << bits;
139             bits += 8;
140 #endif
141         }
142         here = lcode[hold & lmask];
143       dolen:
144         op = (unsigned)(here.bits);
145         hold >>= op;
146         bits -= op;
147         op = (unsigned)(here.op);
148         if (op == 0) {                          /* literal */
149             Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
150                     "inflate:         literal '%c'\n" :
151                     "inflate:         literal 0x%02x\n", here.val));
152             *out++ = (unsigned char)(here.val);
153         }
154         else if (op & 16) {                     /* length base */
155             len = (unsigned)(here.val);
156             op &= 15;                           /* number of extra bits */
157             if (op) {
158                 if (bits < op) {
159 #ifdef INFLATE_CHUNK_READ_64LE
160                     hold |= read64le(in) << bits;
161                     in += 6;
162                     bits += 48;
163 #else
164                     hold += (unsigned long)(*in++) << bits;
165                     bits += 8;
166 #endif
167                 }
168                 len += (unsigned)hold & ((1U << op) - 1);
169                 hold >>= op;
170                 bits -= op;
171             }
172             Tracevv((stderr, "inflate:         length %u\n", len));
173             if (bits < 15) {
174 #ifdef INFLATE_CHUNK_READ_64LE
175                 hold |= read64le(in) << bits;
176                 in += 6;
177                 bits += 48;
178 #else
179                 hold += (unsigned long)(*in++) << bits;
180                 bits += 8;
181                 hold += (unsigned long)(*in++) << bits;
182                 bits += 8;
183 #endif
184             }
185             here = dcode[hold & dmask];
186           dodist:
187             op = (unsigned)(here.bits);
188             hold >>= op;
189             bits -= op;
190             op = (unsigned)(here.op);
191             if (op & 16) {                      /* distance base */
192                 dist = (unsigned)(here.val);
193                 op &= 15;                       /* number of extra bits */
194                 if (bits < op) {
195 #ifdef INFLATE_CHUNK_READ_64LE
196                     hold |= read64le(in) << bits;
197                     in += 6;
198                     bits += 48;
199 #else
200                     hold += (unsigned long)(*in++) << bits;
201                     bits += 8;
202                     if (bits < op) {
203                         hold += (unsigned long)(*in++) << bits;
204                         bits += 8;
205                     }
206 #endif
207                 }
208                 dist += (unsigned)hold & ((1U << op) - 1);
209 #ifdef INFLATE_STRICT
210                 if (dist > dmax) {
211                     strm->msg = (char *)"invalid distance too far back";
212                     state->mode = BAD;
213                     break;
214                 }
215 #endif
216                 hold >>= op;
217                 bits -= op;
218                 Tracevv((stderr, "inflate:         distance %u\n", dist));
219                 op = (unsigned)(out - beg);     /* max distance in output */
220                 if (dist > op) {                /* see if copy from window */
221                     op = dist - op;             /* distance back in window */
222                     if (op > whave) {
223                         if (state->sane) {
224                             strm->msg =
225                                 (char *)"invalid distance too far back";
226                             state->mode = BAD;
227                             break;
228                         }
229 #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
230                         if (len <= op - whave) {
231                             do {
232                                 *out++ = 0;
233                             } while (--len);
234                             continue;
235                         }
236                         len -= op - whave;
237                         do {
238                             *out++ = 0;
239                         } while (--op > whave);
240                         if (op == 0) {
241                             from = out - dist;
242                             do {
243                                 *out++ = *from++;
244                             } while (--len);
245                             continue;
246                         }
247 #endif
248                     }
249                     from = window;
250                     if (wnext >= op) {          /* contiguous in window */
251                         from += wnext - op;
252                     }
253                     else {                      /* wrap around window */
254                         op -= wnext;
255                         from += wsize - op;
256                         if (op < len) {         /* some from end of window */
257                             len -= op;
258                             out = chunkcopy_safe(out, from, op, limit);
259                             from = window;      /* more from start of window */
260                             op = wnext;
261                             /* This (rare) case can create a situation where
262                                the first chunkcopy below must be checked.
263                              */
264                         }
265                     }
266                     if (op < len) {             /* still need some from output */
267                         out = chunkcopy_safe(out, from, op, limit);
268                         len -= op;
269                         /* When dist is small the amount of data that can be
270                            copied from the window is also small, and progress
271                            towards the dangerous end of the output buffer is
272                            also small.  This means that for trivial memsets and
273                            for chunkunroll_relaxed() a safety check is
274                            unnecessary.  However, these conditions may not be
275                            entered at all, and in that case it's possible that
276                            the main copy is near the end.
277                           */
278                         out = chunkunroll_relaxed(out, &dist, &len);
279                         out = chunkcopy_safe(out, out - dist, len, limit);
280                     } else {
281                         /* from points to window, so there is no risk of
282                            overlapping pointers requiring memset-like behaviour
283                          */
284                         out = chunkcopy_safe(out, from, len, limit);
285                     }
286                 }
287                 else {
288                     /* Whole reference is in range of current output.  No
289                        range checks are necessary because we start with room
290                        for at least 258 bytes of output, so unroll and roundoff
291                        operations can write beyond `out+len` so long as they
292                        stay within 258 bytes of `out`.
293                      */
294                     out = chunkcopy_lapped_relaxed(out, dist, len);
295                 }
296             }
297             else if ((op & 64) == 0) {          /* 2nd level distance code */
298                 here = dcode[here.val + (hold & ((1U << op) - 1))];
299                 goto dodist;
300             }
301             else {
302                 strm->msg = (char *)"invalid distance code";
303                 state->mode = BAD;
304                 break;
305             }
306         }
307         else if ((op & 64) == 0) {              /* 2nd level length code */
308             here = lcode[here.val + (hold & ((1U << op) - 1))];
309             goto dolen;
310         }
311         else if (op & 32) {                     /* end-of-block */
312             Tracevv((stderr, "inflate:         end of block\n"));
313             state->mode = TYPE;
314             break;
315         }
316         else {
317             strm->msg = (char *)"invalid literal/length code";
318             state->mode = BAD;
319             break;
320         }
321     } while (in < last && out < end);
322 
323     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
324     len = bits >> 3;
325     in -= len;
326     bits -= len << 3;
327     hold &= (1U << bits) - 1;
328 
329     /* update state and return */
330     strm->next_in = in;
331     strm->next_out = out;
332     strm->avail_in = (unsigned)(in < last ?
333         (INFLATE_FAST_MIN_INPUT - 1) + (last - in) :
334         (INFLATE_FAST_MIN_INPUT - 1) - (in - last));
335     strm->avail_out = (unsigned)(out < end ?
336         (INFLATE_FAST_MIN_OUTPUT - 1) + (end - out) :
337         (INFLATE_FAST_MIN_OUTPUT - 1) - (out - end));
338     state->hold = hold;
339     state->bits = bits;
340 
341     Assert((state->hold >> state->bits) == 0, "invalid input data state");
342     return;
343 }
344 
345 /*
346    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
347    - Using bit fields for code structure
348    - Different op definition to avoid & for extra bits (do & for table bits)
349    - Three separate decoding do-loops for direct, window, and wnext == 0
350    - Special case for distance > 1 copies to do overlapped load and store copy
351    - Explicit branch predictions (based on measured branch probabilities)
352    - Deferring match copy and interspersed it with decoding subsequent codes
353    - Swapping literal/length else
354    - Swapping window/direct else
355    - Larger unrolled copy loops (three is about right)
356    - Moving len -= 3 statement into middle of loop
357  */
358 
359 #endif /* !ASMINF */
360