1 /* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004-2017 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6 #include "gzguts.h"
7 #include <fcntl.h>
8 #include <unistd.h>
9
10 /* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from
11 state->fd, and update state->eof, state->err, and state->msg as appropriate.
12 This function needs to loop on read(), since read() is not guaranteed to
13 read the number of bytes requested, depending on the type of descriptor. */
gz_load(gz_statep state,unsigned char * buf,unsigned len,unsigned * have)14 local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
15 unsigned *have)
16 {
17 int ret;
18 unsigned get, max = ((unsigned)-1 >> 2) + 1;
19
20 *have = 0;
21 do {
22 get = len - *have;
23 if (get > max)
24 {
25 get = max;
26 }
27 ret = read(state->fd, buf + *have, get);
28 if (ret <= 0)
29 {
30 break;
31 }
32 *have += (unsigned)ret;
33 } while (*have < len);
34 if (ret < 0) {
35 gz_error(state, Z_ERRNO, zstrerror());
36 return -1;
37 }
38 if (ret == 0)
39 {
40 state->eof = 1;
41 }
42 return 0;
43 }
44
45 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
46 error, 0 otherwise. Note that the eof flag is set when the end of the input
47 file is reached, even though there may be unused data in the buffer. Once
48 that data has been used, no more attempts will be made to read the file.
49 If strm->avail_in != 0, then the current data is moved to the beginning of
50 the input buffer, and then the remainder of the buffer is loaded with the
51 available data from the input file. */
gz_avail(gz_statep state)52 local int gz_avail(gz_statep state)
53 {
54 unsigned got;
55 z_streamp strm = &(state->strm);
56
57 if (state->err != Z_OK && state->err != Z_BUF_ERROR)
58 {
59 return -1;
60 }
61 if (state->eof == 0) {
62 if (strm->avail_in) { /* copy what's there to the start */
63 unsigned char *p = state->in;
64 unsigned const char *q = strm->next_in;
65 unsigned n = strm->avail_in;
66 do {
67 *p++ = *q++;
68 } while (--n);
69 }
70 if (gz_load(state, state->in + strm->avail_in,
71 state->size - strm->avail_in, &got) == -1)
72 {
73 return -1;
74 }
75 strm->avail_in += got;
76 strm->next_in = state->in;
77 }
78 return 0;
79 }
80
81 /* Look for gzip header, set up for inflate or copy. state->x.have must be 0.
82 If this is the first time in, allocate required memory. state->how will be
83 left unchanged if there is no more input data available, will be set to COPY
84 if there is no gzip header and direct copying will be performed, or it will
85 be set to GZIP for decompression. If direct copying, then leftover input
86 data from the input buffer will be copied to the output buffer. In that
87 case, all further file reads will be directly to either the output buffer or
88 a user buffer. If decompressing, the inflate state will be initialized.
89 gz_look() will return 0 on success or -1 on failure. */
gz_look(gz_statep state)90 local int gz_look(gz_statep state)
91 {
92 z_streamp strm = &(state->strm);
93
94 /* allocate read buffers and inflate memory */
95 if (state->size == 0) {
96 /* allocate buffers */
97 state->in = (unsigned char *)malloc(state->want);
98 state->out = (unsigned char *)malloc(state->want << 1);
99 if (state->in == NULL || state->out == NULL) {
100 free(state->out);
101 free(state->in);
102 gz_error(state, Z_MEM_ERROR, "out of memory");
103 return -1;
104 }
105 state->size = state->want;
106
107 /* allocate inflate memory */
108 state->strm.zalloc = Z_NULL;
109 state->strm.zfree = Z_NULL;
110 state->strm.opaque = Z_NULL;
111 state->strm.avail_in = 0;
112 state->strm.next_in = Z_NULL;
113 if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */
114 free(state->out);
115 free(state->in);
116 state->size = 0;
117 gz_error(state, Z_MEM_ERROR, "out of memory");
118 return -1;
119 }
120 }
121
122 /* get at least the magic bytes in the input buffer */
123 if (strm->avail_in < 2) {
124 if (gz_avail(state) == -1)
125 {
126 return -1;
127 }
128 if (strm->avail_in == 0)
129 {
130 return 0;
131 }
132 }
133
134 /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
135 a logical dilemma here when considering the case of a partially written
136 gzip file, to wit, if a single 31 byte is written, then we cannot tell
137 whether this is a single-byte file, or just a partially written gzip
138 file -- for here we assume that if a gzip file is being written, then
139 the header will be written in a single operation, so that reading a
140 single byte is sufficient indication that it is not a gzip file) */
141 if (strm->avail_in > 1 &&
142 strm->next_in[0] == 31 && strm->next_in[1] == 139) {
143 inflateReset(strm);
144 state->how = GZIP;
145 state->direct = 0;
146 return 0;
147 }
148
149 /* no gzip header -- if we were decoding gzip before, then this is trailing
150 garbage. Ignore the trailing garbage and finish. */
151 if (state->direct == 0) {
152 strm->avail_in = 0;
153 state->eof = 1;
154 state->x.have = 0;
155 return 0;
156 }
157
158 /* doing raw i/o, copy any leftover input to output -- this assumes that
159 the output buffer is larger than the input buffer, which also assures
160 space for gzungetc() */
161 state->x.next = state->out;
162 memcpy(state->x.next, strm->next_in, strm->avail_in);
163 state->x.have = strm->avail_in;
164 strm->avail_in = 0;
165 state->how = COPY;
166 state->direct = 1;
167 return 0;
168 }
169
170 /* Decompress from input to the provided next_out and avail_out in the state.
171 On return, state->x.have and state->x.next point to the just decompressed
172 data. If the gzip stream completes, state->how is reset to LOOK to look for
173 the next gzip stream or raw data, once state->x.have is depleted. Returns 0
174 on success, -1 on failure. */
gz_decomp(gz_statep state)175 local int gz_decomp(gz_statep state)
176 {
177 int ret = Z_OK;
178 unsigned had;
179 z_streamp strm = &(state->strm);
180
181 /* fill output buffer up to end of deflate stream */
182 had = strm->avail_out;
183 do {
184 /* get more input for inflate() */
185 if (strm->avail_in == 0 && gz_avail(state) == -1)
186 {
187 return -1;
188 }
189 if (strm->avail_in == 0) {
190 gz_error(state, Z_BUF_ERROR, "unexpected end of file");
191 break;
192 }
193
194 /* decompress and handle errors */
195 ret = inflate(strm, Z_NO_FLUSH);
196 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
197 gz_error(state, Z_STREAM_ERROR,
198 "internal error: inflate stream corrupt");
199 return -1;
200 }
201 if (ret == Z_MEM_ERROR) {
202 gz_error(state, Z_MEM_ERROR, "out of memory");
203 return -1;
204 }
205 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
206 gz_error(state, Z_DATA_ERROR,
207 strm->msg == NULL ? "compressed data error" : strm->msg);
208 return -1;
209 }
210 } while (strm->avail_out && ret != Z_STREAM_END);
211
212 /* update available output */
213 state->x.have = had - strm->avail_out;
214 state->x.next = strm->next_out - state->x.have;
215
216 /* if the gzip stream completed successfully, look for another */
217 if (ret == Z_STREAM_END)
218 state->how = LOOK;
219
220 /* good decompression */
221 return 0;
222 }
223
224 /* Fetch data and put it in the output buffer. Assumes state->x.have is 0.
225 Data is either copied from the input file or decompressed from the input
226 file depending on state->how. If state->how is LOOK, then a gzip header is
227 looked for to determine whether to copy or decompress. Returns -1 on error,
228 otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the
229 end of the input file has been reached and all data has been processed. */
gz_fetch(gz_statep state)230 local int gz_fetch(gz_statep state)
231 {
232 z_streamp strm = &(state->strm);
233
234 do {
235 switch(state->how) {
236 case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */
237 if (gz_look(state) == -1)
238 {
239 return -1;
240 }
241 if (state->how == LOOK)
242 {
243 return 0;
244 }
245 break;
246 case COPY: /* -> COPY */
247 if (gz_load(state, state->out, state->size << 1, &(state->x.have))
248 == -1)
249 {
250 return -1;
251 }
252 state->x.next = state->out;
253 return 0;
254 case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */
255 strm->avail_out = state->size << 1;
256 strm->next_out = state->out;
257 if (gz_decomp(state) == -1)
258 {
259 return -1;
260 }
261 }
262 } while (state->x.have == 0 && (!state->eof || strm->avail_in));
263 return 0;
264 }
265
266 /* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */
gz_skip(gz_statep state,z_off64_t len)267 local int gz_skip(gz_statep state, z_off64_t len)
268 {
269 unsigned n;
270
271 /* skip over len bytes or reach end-of-file, whichever comes first */
272 while (len)
273 /* skip over whatever is in output buffer */
274 if (state->x.have) {
275 n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
276 (unsigned)len : state->x.have;
277 state->x.have -= n;
278 state->x.next += n;
279 state->x.pos += n;
280 len -= n;
281 }
282
283 /* output buffer empty -- return if we're at the end of the input */
284 else if (state->eof && state->strm.avail_in == 0)
285 {
286 break;
287 }
288
289 /* need more data to skip -- load up output buffer */
290 else {
291 /* get more output, looking for header if required */
292 if (gz_fetch(state) == -1)
293 {
294 return -1;
295 }
296 }
297 return 0;
298 }
299
300 /* Read len bytes into buf from file, or less than len up to the end of the
301 input. Return the number of bytes read. If zero is returned, either the
302 end of file was reached, or there was an error. state->err must be
303 consulted in that case to determine which. */
gz_read(gz_statep state,voidp buf,z_size_t len)304 local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len)
305 {
306 z_size_t got;
307 unsigned n;
308
309 /* if len is zero, avoid unnecessary operations */
310 if (len == 0)
311 {
312 return 0;
313 }
314
315 /* process a skip request */
316 if (state->seek) {
317 state->seek = 0;
318 if (gz_skip(state, state->skip) == -1)
319 {
320 return 0;
321 }
322 }
323
324 /* get len bytes to buf, or less than len if at the end */
325 got = 0;
326 do {
327 /* set n to the maximum amount of len that fits in an unsigned int */
328 n = (unsigned)-1;
329 if (n > len)
330 {
331 n = (unsigned)len;
332 }
333
334 /* first just try copying data from the output buffer */
335 if (state->x.have) {
336 if (state->x.have < n)
337 {
338 n = state->x.have;
339 }
340 memcpy(buf, state->x.next, n);
341 state->x.next += n;
342 state->x.have -= n;
343 }
344
345 /* output buffer empty -- return if we're at the end of the input */
346 else if (state->eof && state->strm.avail_in == 0) {
347 state->past = 1; /* tried to read past end */
348 break;
349 }
350
351 /* need output data -- for small len or new stream load up our output
352 buffer */
353 else if (state->how == LOOK || n < (state->size << 1)) {
354 /* get more output, looking for header if required */
355 if (gz_fetch(state) == -1)
356 {
357 return 0;
358 }
359 continue; /* no progress yet -- go back to copy above */
360 /* the copy above assures that we will leave with space in the
361 output buffer, allowing at least one gzungetc() to succeed */
362 }
363
364 /* large len -- read directly into user buffer */
365 else if (state->how == COPY) { /* read directly */
366 if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
367 {
368 return 0;
369 }
370 }
371
372 /* large len -- decompress directly into user buffer */
373 else { /* state->how == GZIP */
374 state->strm.avail_out = n;
375 state->strm.next_out = (unsigned char *)buf;
376 if (gz_decomp(state) == -1)
377 {
378 return 0;
379 }
380 n = state->x.have;
381 state->x.have = 0;
382 }
383
384 /* update progress */
385 len -= n;
386 buf = (char *)buf + n;
387 got += n;
388 state->x.pos += n;
389 } while (len);
390
391 /* return number of bytes read into user buffer */
392 return got;
393 }
394
395 /* -- see zlib.h -- */
gzread(gzFile file,voidp buf,unsigned len)396 int ZEXPORT gzread(gzFile file, voidp buf, unsigned len)
397 {
398 gz_statep state;
399
400 /* get internal structure */
401 if (file == NULL)
402 {
403 return -1;
404 }
405 state = (gz_statep)file;
406
407 /* check that we're reading and that there's no (serious) error */
408 if (state->mode != GZ_READ ||
409 (state->err != Z_OK && state->err != Z_BUF_ERROR))
410 {
411 return -1;
412 }
413
414 /* since an int is returned, make sure len fits in one, otherwise return
415 with an error (this avoids a flaw in the interface) */
416 if ((int)len < 0) {
417 gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
418 return -1;
419 }
420
421 /* read len or fewer bytes to buf */
422 len = (unsigned)gz_read(state, buf, len);
423
424 /* check for an error */
425 if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
426 {
427 return -1;
428 }
429
430 /* return the number of bytes read (this is assured to fit in an int) */
431 return (int)len;
432 }
433
434 /* -- see zlib.h -- */
gzfread(voidp buf,z_size_t size,z_size_t nitems,gzFile file)435 z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, gzFile file)
436 {
437 z_size_t len;
438 gz_statep state;
439
440 /* get internal structure */
441 if (file == NULL)
442 {
443 return 0;
444 }
445 state = (gz_statep)file;
446
447 /* check that we're reading and that there's no (serious) error */
448 if (state->mode != GZ_READ ||
449 (state->err != Z_OK && state->err != Z_BUF_ERROR))
450 {
451 return 0;
452 }
453
454 /* compute bytes to read -- error on overflow */
455 len = nitems * size;
456 if (size && len / size != nitems) {
457 gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
458 return 0;
459 }
460
461 /* read len or fewer bytes to buf, return the number of full items read */
462 return len ? gz_read(state, buf, len) / size : 0;
463 }
464
465 /* -- see zlib.h -- */
466 #ifdef Z_PREFIX_SET
467 # undef z_gzgetc
468 #else
469 # undef gzgetc
470 #endif
gzgetc(gzFile file)471 int ZEXPORT gzgetc(gzFile file)
472 {
473 unsigned char buf[1];
474 gz_statep state;
475
476 /* get internal structure */
477 if (file == NULL)
478 {
479 return -1;
480 }
481 state = (gz_statep)file;
482
483 /* check that we're reading and that there's no (serious) error */
484 if (state->mode != GZ_READ ||
485 (state->err != Z_OK && state->err != Z_BUF_ERROR))
486 {
487 return -1;
488 }
489
490 /* try output buffer (no need to check for skip request) */
491 if (state->x.have) {
492 state->x.have--;
493 state->x.pos++;
494 return *(state->x.next)++;
495 }
496
497 /* nothing there -- try gz_read() */
498 return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
499 }
500
gzgetc_(gzFile file)501 int ZEXPORT gzgetc_(gzFile file)
502 {
503 return gzgetc(file);
504 }
505
506 /* -- see zlib.h -- */
gzungetc(int c,gzFile file)507 int ZEXPORT gzungetc(int c, gzFile file)
508 {
509 gz_statep state;
510
511 /* get internal structure */
512 if (file == NULL)
513 {
514 return -1;
515 }
516 state = (gz_statep)file;
517
518 /* in case this was just opened, set up the input buffer */
519 if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
520 {
521 (void)gz_look(state);
522 }
523
524 /* check that we're reading and that there's no (serious) error */
525 if (state->mode != GZ_READ ||
526 (state->err != Z_OK && state->err != Z_BUF_ERROR))
527 {
528 return -1;
529 }
530
531 /* process a skip request */
532 if (state->seek) {
533 state->seek = 0;
534 if (gz_skip(state, state->skip) == -1)
535 {
536 return -1;
537 }
538 }
539
540 /* can't push EOF */
541 if (c < 0)
542 {
543 return -1;
544 }
545
546 /* if output buffer empty, put byte at end (allows more pushing) */
547 if (state->x.have == 0) {
548 state->x.have = 1;
549 state->x.next = state->out + (state->size << 1) - 1;
550 state->x.next[0] = (unsigned char)c;
551 state->x.pos--;
552 state->past = 0;
553 return c;
554 }
555
556 /* if no room, give up (must have already done a gzungetc()) */
557 if (state->x.have == (state->size << 1)) {
558 gz_error(state, Z_DATA_ERROR, "out of room to push characters");
559 return -1;
560 }
561
562 /* slide output data if needed and insert byte before existing data */
563 if (state->x.next == state->out) {
564 unsigned char *src = state->out + state->x.have;
565 unsigned char *dest = state->out + (state->size << 1);
566 while (src > state->out){
567 *--dest = *--src;
568 }
569 state->x.next = dest;
570 }
571 state->x.have++;
572 state->x.next--;
573 state->x.next[0] = (unsigned char)c;
574 state->x.pos--;
575 state->past = 0;
576 return c;
577 }
578
579 /* -- see zlib.h -- */
gzgets(gzFile file,char * buf,int len)580 char * ZEXPORT gzgets(gzFile file, char *buf, int len)
581 {
582 unsigned left, n;
583 char *str;
584 unsigned char *eol;
585 gz_statep state;
586
587 /* check parameters and get internal structure */
588 if (file == NULL || buf == NULL || len < 1)
589 {
590 return NULL;
591 }
592 state = (gz_statep)file;
593
594 /* check that we're reading and that there's no (serious) error */
595 if (state->mode != GZ_READ ||
596 (state->err != Z_OK && state->err != Z_BUF_ERROR))
597 {
598 return NULL;
599 }
600
601 /* process a skip request */
602 if (state->seek) {
603 state->seek = 0;
604 if (gz_skip(state, state->skip) == -1)
605 {
606 return NULL;
607 }
608 }
609
610 /* copy output bytes up to new line or len - 1, whichever comes first --
611 append a terminating zero to the string (we don't check for a zero in
612 the contents, let the user worry about that) */
613 str = buf;
614 left = (unsigned)len - 1;
615 if (left) do {
616 /* assure that something is in the output buffer */
617 if (state->x.have == 0 && gz_fetch(state) == -1)
618 {
619 return NULL; /* error */
620 }
621 if (state->x.have == 0) { /* end of file */
622 state->past = 1; /* read past end */
623 break; /* return what we have */
624 }
625
626 /* look for end-of-line in current output buffer */
627 n = state->x.have > left ? left : state->x.have;
628 eol = (unsigned char *)memchr(state->x.next, '\n', n);
629 if (eol != NULL)
630 {
631 n = (unsigned)(eol - state->x.next) + 1;
632 }
633
634 /* copy through end-of-line, or remainder if not found */
635 memcpy(buf, state->x.next, n);
636 state->x.have -= n;
637 state->x.next += n;
638 state->x.pos += n;
639 left -= n;
640 buf += n;
641 } while (left && eol == NULL);
642
643 /* return terminated string, or if nothing, end of file */
644 if (buf == str)
645 {
646 return NULL;
647 }
648 buf[0] = 0;
649 return str;
650 }
651
652 /* -- see zlib.h -- */
gzdirect(gzFile file)653 int ZEXPORT gzdirect(gzFile file)
654 {
655 gz_statep state;
656
657 /* get internal structure */
658 if (file == NULL)
659 {
660 return 0;
661 }
662 state = (gz_statep)file;
663
664 /* if the state is not known, but we can find out, then do so (this is
665 mainly for right after a gzopen() or gzdopen()) */
666 if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
667 {
668 (void)gz_look(state);
669 }
670
671 /* return 1 if transparent, 0 if processing a gzip stream */
672 return state->direct;
673 }
674
675 /* -- see zlib.h -- */
gzclose_r(gzFile file)676 int ZEXPORT gzclose_r(gzFile file)
677 {
678 int ret, err;
679 gz_statep state;
680
681 /* get internal structure */
682 if (file == NULL)
683 {
684 return Z_STREAM_ERROR;
685 }
686 state = (gz_statep)file;
687
688 /* check that we're reading */
689 if (state->mode != GZ_READ)
690 {
691 return Z_STREAM_ERROR;
692 }
693
694 /* free memory and close file */
695 if (state->size) {
696 inflateEnd(&(state->strm));
697 free(state->out);
698 free(state->in);
699 }
700 err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
701 gz_error(state, Z_OK, NULL);
702 free(state->path);
703 ret = close(state->fd);
704 free(state);
705 return ret ? Z_ERRNO : err;
706 }
707