• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* gzread.c -- zlib functions for reading gzip files
2  * Copyright (C) 2004-2017 Mark Adler
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  */
5 
6 #include "gzguts.h"
7 #include <fcntl.h>
8 #include <unistd.h>
9 
10 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
11    state->fd, and update state->eof, state->err, and state->msg as appropriate.
12    This function needs to loop on read(), since read() is not guaranteed to
13    read the number of bytes requested, depending on the type of descriptor. */
gz_load(gz_statep state,unsigned char * buf,unsigned len,unsigned * have)14 local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
15                   unsigned *have)
16 {
17     int ret;
18     unsigned get, max = ((unsigned)-1 >> 2) + 1;
19 
20     *have = 0;
21     do {
22         get = len - *have;
23         if (get > max)
24         {
25             get = max;
26         }
27         ret = read(state->fd, buf + *have, get);
28         if (ret <= 0)
29         {
30             break;
31         }
32         *have += (unsigned)ret;
33     } while (*have < len);
34     if (ret < 0) {
35         gz_error(state, Z_ERRNO, zstrerror());
36         return -1;
37     }
38     if (ret == 0)
39     {
40         state->eof = 1;
41     }
42     return 0;
43 }
44 
45 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
46    error, 0 otherwise.  Note that the eof flag is set when the end of the input
47    file is reached, even though there may be unused data in the buffer.  Once
48    that data has been used, no more attempts will be made to read the file.
49    If strm->avail_in != 0, then the current data is moved to the beginning of
50    the input buffer, and then the remainder of the buffer is loaded with the
51    available data from the input file. */
gz_avail(gz_statep state)52 local int gz_avail(gz_statep state)
53 {
54     unsigned got;
55     z_streamp strm = &(state->strm);
56 
57     if (state->err != Z_OK && state->err != Z_BUF_ERROR)
58     {
59         return -1;
60     }
61     if (state->eof == 0) {
62         if (strm->avail_in) {       /* copy what's there to the start */
63             unsigned char *p = state->in;
64             unsigned const char *q = strm->next_in;
65             unsigned n = strm->avail_in;
66             do {
67                 *p++ = *q++;
68             } while (--n);
69         }
70         if (gz_load(state, state->in + strm->avail_in,
71                     state->size - strm->avail_in, &got) == -1)
72         {
73             return -1;
74         }
75         strm->avail_in += got;
76         strm->next_in = state->in;
77     }
78     return 0;
79 }
80 
81 /* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
82    If this is the first time in, allocate required memory.  state->how will be
83    left unchanged if there is no more input data available, will be set to COPY
84    if there is no gzip header and direct copying will be performed, or it will
85    be set to GZIP for decompression.  If direct copying, then leftover input
86    data from the input buffer will be copied to the output buffer.  In that
87    case, all further file reads will be directly to either the output buffer or
88    a user buffer.  If decompressing, the inflate state will be initialized.
89    gz_look() will return 0 on success or -1 on failure. */
gz_look(gz_statep state)90 local int gz_look(gz_statep state)
91 {
92     z_streamp strm = &(state->strm);
93 
94     /* allocate read buffers and inflate memory */
95     if (state->size == 0) {
96         /* allocate buffers */
97         state->in = (unsigned char *)malloc(state->want);
98         state->out = (unsigned char *)malloc(state->want << 1);
99         if (state->in == NULL || state->out == NULL) {
100             free(state->out);
101             free(state->in);
102             gz_error(state, Z_MEM_ERROR, "out of memory");
103             return -1;
104         }
105         state->size = state->want;
106 
107         /* allocate inflate memory */
108         state->strm.zalloc = Z_NULL;
109         state->strm.zfree = Z_NULL;
110         state->strm.opaque = Z_NULL;
111         state->strm.avail_in = 0;
112         state->strm.next_in = Z_NULL;
113         if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
114             free(state->out);
115             free(state->in);
116             state->size = 0;
117             gz_error(state, Z_MEM_ERROR, "out of memory");
118             return -1;
119         }
120     }
121 
122     /* get at least the magic bytes in the input buffer */
123     if (strm->avail_in < 2) {
124         if (gz_avail(state) == -1)
125         {
126             return -1;
127         }
128         if (strm->avail_in == 0)
129         {
130             return 0;
131         }
132     }
133 
134     /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
135        a logical dilemma here when considering the case of a partially written
136        gzip file, to wit, if a single 31 byte is written, then we cannot tell
137        whether this is a single-byte file, or just a partially written gzip
138        file -- for here we assume that if a gzip file is being written, then
139        the header will be written in a single operation, so that reading a
140        single byte is sufficient indication that it is not a gzip file) */
141     if (strm->avail_in > 1 &&
142             strm->next_in[0] == 31 && strm->next_in[1] == 139) {
143         inflateReset(strm);
144         state->how = GZIP;
145         state->direct = 0;
146         return 0;
147     }
148 
149     /* no gzip header -- if we were decoding gzip before, then this is trailing
150        garbage.  Ignore the trailing garbage and finish. */
151     if (state->direct == 0) {
152         strm->avail_in = 0;
153         state->eof = 1;
154         state->x.have = 0;
155         return 0;
156     }
157 
158     /* doing raw i/o, copy any leftover input to output -- this assumes that
159        the output buffer is larger than the input buffer, which also assures
160        space for gzungetc() */
161     state->x.next = state->out;
162     memcpy(state->x.next, strm->next_in, strm->avail_in);
163     state->x.have = strm->avail_in;
164     strm->avail_in = 0;
165     state->how = COPY;
166     state->direct = 1;
167     return 0;
168 }
169 
170 /* Decompress from input to the provided next_out and avail_out in the state.
171    On return, state->x.have and state->x.next point to the just decompressed
172    data.  If the gzip stream completes, state->how is reset to LOOK to look for
173    the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
174    on success, -1 on failure. */
gz_decomp(gz_statep state)175 local int gz_decomp(gz_statep state)
176 {
177     int ret = Z_OK;
178     unsigned had;
179     z_streamp strm = &(state->strm);
180 
181     /* fill output buffer up to end of deflate stream */
182     had = strm->avail_out;
183     do {
184         /* get more input for inflate() */
185         if (strm->avail_in == 0 && gz_avail(state) == -1)
186         {
187             return -1;
188         }
189         if (strm->avail_in == 0) {
190             gz_error(state, Z_BUF_ERROR, "unexpected end of file");
191             break;
192         }
193 
194         /* decompress and handle errors */
195         ret = inflate(strm, Z_NO_FLUSH);
196         if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
197             gz_error(state, Z_STREAM_ERROR,
198                      "internal error: inflate stream corrupt");
199             return -1;
200         }
201         if (ret == Z_MEM_ERROR) {
202             gz_error(state, Z_MEM_ERROR, "out of memory");
203             return -1;
204         }
205         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
206             gz_error(state, Z_DATA_ERROR,
207                      strm->msg == NULL ? "compressed data error" : strm->msg);
208             return -1;
209         }
210     } while (strm->avail_out && ret != Z_STREAM_END);
211 
212     /* update available output */
213     state->x.have = had - strm->avail_out;
214     state->x.next = strm->next_out - state->x.have;
215 
216     /* if the gzip stream completed successfully, look for another */
217     if (ret == Z_STREAM_END)
218         state->how = LOOK;
219 
220     /* good decompression */
221     return 0;
222 }
223 
224 /* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
225    Data is either copied from the input file or decompressed from the input
226    file depending on state->how.  If state->how is LOOK, then a gzip header is
227    looked for to determine whether to copy or decompress.  Returns -1 on error,
228    otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
229    end of the input file has been reached and all data has been processed.  */
gz_fetch(gz_statep state)230 local int gz_fetch(gz_statep state)
231 {
232     z_streamp strm = &(state->strm);
233 
234     do {
235         switch(state->how) {
236         case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
237             if (gz_look(state) == -1)
238             {
239                 return -1;
240             }
241             if (state->how == LOOK)
242             {
243                 return 0;
244             }
245             break;
246         case COPY:      /* -> COPY */
247             if (gz_load(state, state->out, state->size << 1, &(state->x.have))
248                     == -1)
249             {
250                 return -1;
251             }
252             state->x.next = state->out;
253             return 0;
254         case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
255             strm->avail_out = state->size << 1;
256             strm->next_out = state->out;
257             if (gz_decomp(state) == -1)
258             {
259                 return -1;
260             }
261         }
262     } while (state->x.have == 0 && (!state->eof || strm->avail_in));
263     return 0;
264 }
265 
266 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
gz_skip(gz_statep state,z_off64_t len)267 local int gz_skip(gz_statep state, z_off64_t len)
268 {
269     unsigned n;
270 
271     /* skip over len bytes or reach end-of-file, whichever comes first */
272     while (len)
273         /* skip over whatever is in output buffer */
274         if (state->x.have) {
275             n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
276                 (unsigned)len : state->x.have;
277             state->x.have -= n;
278             state->x.next += n;
279             state->x.pos += n;
280             len -= n;
281         }
282 
283         /* output buffer empty -- return if we're at the end of the input */
284         else if (state->eof && state->strm.avail_in == 0)
285         {
286             break;
287         }
288 
289         /* need more data to skip -- load up output buffer */
290         else {
291             /* get more output, looking for header if required */
292             if (gz_fetch(state) == -1)
293             {
294                 return -1;
295             }
296         }
297     return 0;
298 }
299 
300 /* Read len bytes into buf from file, or less than len up to the end of the
301    input.  Return the number of bytes read.  If zero is returned, either the
302    end of file was reached, or there was an error.  state->err must be
303    consulted in that case to determine which. */
gz_read(gz_statep state,voidp buf,z_size_t len)304 local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len)
305 {
306     z_size_t got;
307     unsigned n;
308 
309     /* if len is zero, avoid unnecessary operations */
310     if (len == 0)
311     {
312         return 0;
313     }
314 
315     /* process a skip request */
316     if (state->seek) {
317         state->seek = 0;
318         if (gz_skip(state, state->skip) == -1)
319         {
320             return 0;
321         }
322     }
323 
324     /* get len bytes to buf, or less than len if at the end */
325     got = 0;
326     do {
327         /* set n to the maximum amount of len that fits in an unsigned int */
328         n = (unsigned)-1;
329         if (n > len)
330         {
331             n = (unsigned)len;
332         }
333 
334         /* first just try copying data from the output buffer */
335         if (state->x.have) {
336             if (state->x.have < n)
337             {
338                 n = state->x.have;
339             }
340             memcpy(buf, state->x.next, n);
341             state->x.next += n;
342             state->x.have -= n;
343         }
344 
345         /* output buffer empty -- return if we're at the end of the input */
346         else if (state->eof && state->strm.avail_in == 0) {
347             state->past = 1;        /* tried to read past end */
348             break;
349         }
350 
351         /* need output data -- for small len or new stream load up our output
352            buffer */
353         else if (state->how == LOOK || n < (state->size << 1)) {
354             /* get more output, looking for header if required */
355             if (gz_fetch(state) == -1)
356             {
357                 return 0;
358             }
359             continue;       /* no progress yet -- go back to copy above */
360             /* the copy above assures that we will leave with space in the
361                output buffer, allowing at least one gzungetc() to succeed */
362         }
363 
364         /* large len -- read directly into user buffer */
365         else if (state->how == COPY) {      /* read directly */
366             if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
367             {
368                 return 0;
369             }
370         }
371 
372         /* large len -- decompress directly into user buffer */
373         else {  /* state->how == GZIP */
374             state->strm.avail_out = n;
375             state->strm.next_out = (unsigned char *)buf;
376             if (gz_decomp(state) == -1)
377             {
378                 return 0;
379             }
380             n = state->x.have;
381             state->x.have = 0;
382         }
383 
384         /* update progress */
385         len -= n;
386         buf = (char *)buf + n;
387         got += n;
388         state->x.pos += n;
389     } while (len);
390 
391     /* return number of bytes read into user buffer */
392     return got;
393 }
394 
395 /* -- see zlib.h -- */
gzread(gzFile file,voidp buf,unsigned len)396 int ZEXPORT gzread(gzFile file, voidp buf, unsigned len)
397 {
398     gz_statep state;
399 
400     /* get internal structure */
401     if (file == NULL)
402     {
403         return -1;
404     }
405     state = (gz_statep)file;
406 
407     /* check that we're reading and that there's no (serious) error */
408     if (state->mode != GZ_READ ||
409             (state->err != Z_OK && state->err != Z_BUF_ERROR))
410     {
411         return -1;
412     }
413 
414     /* since an int is returned, make sure len fits in one, otherwise return
415        with an error (this avoids a flaw in the interface) */
416     if ((int)len < 0) {
417         gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
418         return -1;
419     }
420 
421     /* read len or fewer bytes to buf */
422     len = (unsigned)gz_read(state, buf, len);
423 
424     /* check for an error */
425     if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
426     {
427         return -1;
428     }
429 
430     /* return the number of bytes read (this is assured to fit in an int) */
431     return (int)len;
432 }
433 
434 /* -- see zlib.h -- */
gzfread(voidp buf,z_size_t size,z_size_t nitems,gzFile file)435 z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, gzFile file)
436 {
437     z_size_t len;
438     gz_statep state;
439 
440     /* get internal structure */
441     if (file == NULL)
442     {
443         return 0;
444     }
445     state = (gz_statep)file;
446 
447     /* check that we're reading and that there's no (serious) error */
448     if (state->mode != GZ_READ ||
449             (state->err != Z_OK && state->err != Z_BUF_ERROR))
450     {
451         return 0;
452     }
453 
454     /* compute bytes to read -- error on overflow */
455     len = nitems * size;
456     if (size && len / size != nitems) {
457         gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
458         return 0;
459     }
460 
461     /* read len or fewer bytes to buf, return the number of full items read */
462     return len ? gz_read(state, buf, len) / size : 0;
463 }
464 
465 /* -- see zlib.h -- */
466 #ifdef Z_PREFIX_SET
467 #  undef z_gzgetc
468 #else
469 #  undef gzgetc
470 #endif
gzgetc(gzFile file)471 int ZEXPORT gzgetc(gzFile file)
472 {
473     unsigned char buf[1];
474     gz_statep state;
475 
476     /* get internal structure */
477     if (file == NULL)
478     {
479         return -1;
480     }
481     state = (gz_statep)file;
482 
483     /* check that we're reading and that there's no (serious) error */
484     if (state->mode != GZ_READ ||
485         (state->err != Z_OK && state->err != Z_BUF_ERROR))
486     {
487         return -1;
488     }
489 
490     /* try output buffer (no need to check for skip request) */
491     if (state->x.have) {
492         state->x.have--;
493         state->x.pos++;
494         return *(state->x.next)++;
495     }
496 
497     /* nothing there -- try gz_read() */
498     return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
499 }
500 
gzgetc_(gzFile file)501 int ZEXPORT gzgetc_(gzFile file)
502 {
503     return gzgetc(file);
504 }
505 
506 /* -- see zlib.h -- */
gzungetc(int c,gzFile file)507 int ZEXPORT gzungetc(int c, gzFile file)
508 {
509     gz_statep state;
510 
511     /* get internal structure */
512     if (file == NULL)
513     {
514         return -1;
515     }
516     state = (gz_statep)file;
517 
518     /* in case this was just opened, set up the input buffer */
519     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
520     {
521         (void)gz_look(state);
522     }
523 
524     /* check that we're reading and that there's no (serious) error */
525     if (state->mode != GZ_READ ||
526         (state->err != Z_OK && state->err != Z_BUF_ERROR))
527     {
528         return -1;
529     }
530 
531     /* process a skip request */
532     if (state->seek) {
533         state->seek = 0;
534         if (gz_skip(state, state->skip) == -1)
535         {
536             return -1;
537         }
538     }
539 
540     /* can't push EOF */
541     if (c < 0)
542     {
543         return -1;
544     }
545 
546     /* if output buffer empty, put byte at end (allows more pushing) */
547     if (state->x.have == 0) {
548         state->x.have = 1;
549         state->x.next = state->out + (state->size << 1) - 1;
550         state->x.next[0] = (unsigned char)c;
551         state->x.pos--;
552         state->past = 0;
553         return c;
554     }
555 
556     /* if no room, give up (must have already done a gzungetc()) */
557     if (state->x.have == (state->size << 1)) {
558         gz_error(state, Z_DATA_ERROR, "out of room to push characters");
559         return -1;
560     }
561 
562     /* slide output data if needed and insert byte before existing data */
563     if (state->x.next == state->out) {
564         unsigned char *src = state->out + state->x.have;
565         unsigned char *dest = state->out + (state->size << 1);
566         while (src > state->out){
567             *--dest = *--src;
568         }
569         state->x.next = dest;
570     }
571     state->x.have++;
572     state->x.next--;
573     state->x.next[0] = (unsigned char)c;
574     state->x.pos--;
575     state->past = 0;
576     return c;
577 }
578 
579 /* -- see zlib.h -- */
gzgets(gzFile file,char * buf,int len)580 char * ZEXPORT gzgets(gzFile file, char *buf, int len)
581 {
582     unsigned left, n;
583     char *str;
584     unsigned char *eol;
585     gz_statep state;
586 
587     /* check parameters and get internal structure */
588     if (file == NULL || buf == NULL || len < 1)
589     {
590         return NULL;
591     }
592     state = (gz_statep)file;
593 
594     /* check that we're reading and that there's no (serious) error */
595     if (state->mode != GZ_READ ||
596         (state->err != Z_OK && state->err != Z_BUF_ERROR))
597     {
598         return NULL;
599     }
600 
601     /* process a skip request */
602     if (state->seek) {
603         state->seek = 0;
604         if (gz_skip(state, state->skip) == -1)
605         {
606             return NULL;
607         }
608     }
609 
610     /* copy output bytes up to new line or len - 1, whichever comes first --
611        append a terminating zero to the string (we don't check for a zero in
612        the contents, let the user worry about that) */
613     str = buf;
614     left = (unsigned)len - 1;
615     if (left) do {
616         /* assure that something is in the output buffer */
617         if (state->x.have == 0 && gz_fetch(state) == -1)
618         {
619             return NULL;                /* error */
620         }
621         if (state->x.have == 0) {       /* end of file */
622             state->past = 1;            /* read past end */
623             break;                      /* return what we have */
624         }
625 
626         /* look for end-of-line in current output buffer */
627         n = state->x.have > left ? left : state->x.have;
628         eol = (unsigned char *)memchr(state->x.next, '\n', n);
629         if (eol != NULL)
630         {
631             n = (unsigned)(eol - state->x.next) + 1;
632         }
633 
634         /* copy through end-of-line, or remainder if not found */
635         memcpy(buf, state->x.next, n);
636         state->x.have -= n;
637         state->x.next += n;
638         state->x.pos += n;
639         left -= n;
640         buf += n;
641     } while (left && eol == NULL);
642 
643     /* return terminated string, or if nothing, end of file */
644     if (buf == str)
645     {
646         return NULL;
647     }
648     buf[0] = 0;
649     return str;
650 }
651 
652 /* -- see zlib.h -- */
gzdirect(gzFile file)653 int ZEXPORT gzdirect(gzFile file)
654 {
655     gz_statep state;
656 
657     /* get internal structure */
658     if (file == NULL)
659     {
660         return 0;
661     }
662     state = (gz_statep)file;
663 
664     /* if the state is not known, but we can find out, then do so (this is
665        mainly for right after a gzopen() or gzdopen()) */
666     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
667     {
668         (void)gz_look(state);
669     }
670 
671     /* return 1 if transparent, 0 if processing a gzip stream */
672     return state->direct;
673 }
674 
675 /* -- see zlib.h -- */
gzclose_r(gzFile file)676 int ZEXPORT gzclose_r(gzFile file)
677 {
678     int ret, err;
679     gz_statep state;
680 
681     /* get internal structure */
682     if (file == NULL)
683     {
684         return Z_STREAM_ERROR;
685     }
686     state = (gz_statep)file;
687 
688     /* check that we're reading */
689     if (state->mode != GZ_READ)
690     {
691         return Z_STREAM_ERROR;
692     }
693 
694     /* free memory and close file */
695     if (state->size) {
696         inflateEnd(&(state->strm));
697         free(state->out);
698         free(state->in);
699     }
700     err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
701     gz_error(state, Z_OK, NULL);
702     free(state->path);
703     ret = close(state->fd);
704     free(state);
705     return ret ? Z_ERRNO : err;
706 }
707