1 /* fitblk.c: example of fitting compressed output to a specified size
2 Not copyrighted -- provided to the public domain
3 Version 1.1 25 November 2004 Mark Adler */
4
5 /* Version history:
6 1.0 24 Nov 2004 First version
7 1.1 25 Nov 2004 Change deflateInit2() to deflateInit()
8 Use fixed-size, stack-allocated raw buffers
9 Simplify code moving compression to subroutines
10 Use assert() for internal errors
11 Add detailed description of approach
12 */
13
14 /* Approach to just fitting a requested compressed size:
15
16 fitblk performs three compression passes on a portion of the input
17 data in order to determine how much of that input will compress to
18 nearly the requested output block size. The first pass generates
19 enough deflate blocks to produce output to fill the requested
20 output size plus a specified excess amount (see the EXCESS define
21 below). The last deflate block may go quite a bit past that, but
22 is discarded. The second pass decompresses and recompresses just
23 the compressed data that fit in the requested plus excess sized
24 buffer. The deflate process is terminated after that amount of
25 input, which is less than the amount consumed on the first pass.
26 The last deflate block of the result will be of a comparable size
27 to the final product, so that the header for that deflate block and
28 the compression ratio for that block will be about the same as in
29 the final product. The third compression pass decompresses the
30 result of the second step, but only the compressed data up to the
31 requested size minus an amount to allow the compressed stream to
32 complete (see the MARGIN define below). That will result in a
33 final compressed stream whose length is less than or equal to the
34 requested size. Assuming sufficient input and a requested size
35 greater than a few hundred bytes, the shortfall will typically be
36 less than ten bytes.
37
38 If the input is short enough that the first compression completes
39 before filling the requested output size, then that compressed
40 stream is return with no recompression.
41
42 EXCESS is chosen to be just greater than the shortfall seen in a
43 two pass approach similar to the above. That shortfall is due to
44 the last deflate block compressing more efficiently with a smaller
45 header on the second pass. EXCESS is set to be large enough so
46 that there is enough uncompressed data for the second pass to fill
47 out the requested size, and small enough so that the final deflate
48 block of the second pass will be close in size to the final deflate
49 block of the third and final pass. MARGIN is chosen to be just
50 large enough to assure that the final compression has enough room
51 to complete in all cases.
52 */
53
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <assert.h>
57 #include "zlib.h"
58
59 #define local static
60
61 /* print nastygram and leave */
quit(char * why)62 local void quit(char *why)
63 {
64 fprintf(stderr, "fitblk abort: %s\n", why);
65 exit(1);
66 }
67
68 #define RAWLEN 4096 /* intermediate uncompressed buffer size */
69
70 /* compress from file to def until provided buffer is full or end of
71 input reached; return last deflate() return value, or Z_ERRNO if
72 there was read error on the file */
partcompress(FILE * in,z_streamp def)73 local int partcompress(FILE *in, z_streamp def)
74 {
75 int ret, flush;
76 unsigned char raw[RAWLEN];
77
78 flush = Z_NO_FLUSH;
79 do {
80 def->avail_in = fread(raw, 1, RAWLEN, in);
81 if (ferror(in))
82 return Z_ERRNO;
83 def->next_in = raw;
84 if (feof(in))
85 flush = Z_FINISH;
86 ret = deflate(def, flush);
87 assert(ret != Z_STREAM_ERROR);
88 } while (def->avail_out != 0 && flush == Z_NO_FLUSH);
89 return ret;
90 }
91
92 /* recompress from inf's input to def's output; the input for inf and
93 the output for def are set in those structures before calling;
94 return last deflate() return value, or Z_MEM_ERROR if inflate()
95 was not able to allocate enough memory when it needed to */
recompress(z_streamp inf,z_streamp def)96 local int recompress(z_streamp inf, z_streamp def)
97 {
98 int ret, flush;
99 unsigned char raw[RAWLEN];
100
101 flush = Z_NO_FLUSH;
102 do {
103 /* decompress */
104 inf->avail_out = RAWLEN;
105 inf->next_out = raw;
106 ret = inflate(inf, Z_NO_FLUSH);
107 assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
108 ret != Z_NEED_DICT);
109 if (ret == Z_MEM_ERROR)
110 return ret;
111
112 /* compress what was decompressed until done or no room */
113 def->avail_in = RAWLEN - inf->avail_out;
114 def->next_in = raw;
115 if (inf->avail_out != 0)
116 flush = Z_FINISH;
117 ret = deflate(def, flush);
118 assert(ret != Z_STREAM_ERROR);
119 } while (ret != Z_STREAM_END && def->avail_out != 0);
120 return ret;
121 }
122
123 #define EXCESS 256 /* empirically determined stream overage */
124 #define MARGIN 8 /* amount to back off for completion */
125
126 /* compress from stdin to fixed-size block on stdout */
main(int argc,char ** argv)127 int main(int argc, char **argv)
128 {
129 int ret; /* return code */
130 unsigned size; /* requested fixed output block size */
131 unsigned have; /* bytes written by deflate() call */
132 unsigned char *blk; /* intermediate and final stream */
133 unsigned char *tmp; /* close to desired size stream */
134 z_stream def, inf; /* zlib deflate and inflate states */
135
136 /* get requested output size */
137 if (argc != 2)
138 quit("need one argument: size of output block");
139 ret = strtol(argv[1], argv + 1, 10);
140 if (argv[1][0] != 0)
141 quit("argument must be a number");
142 if (ret < 8) /* 8 is minimum zlib stream size */
143 quit("need positive size of 8 or greater");
144 size = (unsigned)ret;
145
146 /* allocate memory for buffers and compression engine */
147 blk = malloc(size + EXCESS);
148 def.zalloc = Z_NULL;
149 def.zfree = Z_NULL;
150 def.opaque = Z_NULL;
151 ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
152 if (ret != Z_OK || blk == NULL)
153 quit("out of memory");
154
155 /* compress from stdin until output full, or no more input */
156 def.avail_out = size + EXCESS;
157 def.next_out = blk;
158 ret = partcompress(stdin, &def);
159 if (ret == Z_ERRNO)
160 quit("error reading input");
161
162 /* if it all fit, then size was undersubscribed -- done! */
163 if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
164 /* write block to stdout */
165 have = size + EXCESS - def.avail_out;
166 if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
167 quit("error writing output");
168
169 /* clean up and print results to stderr */
170 ret = deflateEnd(&def);
171 assert(ret != Z_STREAM_ERROR);
172 free(blk);
173 fprintf(stderr,
174 "%u bytes unused out of %u requested (all input)\n",
175 size - have, size);
176 return 0;
177 }
178
179 /* it didn't all fit -- set up for recompression */
180 inf.zalloc = Z_NULL;
181 inf.zfree = Z_NULL;
182 inf.opaque = Z_NULL;
183 inf.avail_in = 0;
184 inf.next_in = Z_NULL;
185 ret = inflateInit(&inf);
186 tmp = malloc(size + EXCESS);
187 if (ret != Z_OK || tmp == NULL)
188 quit("out of memory");
189 ret = deflateReset(&def);
190 assert(ret != Z_STREAM_ERROR);
191
192 /* do first recompression close to the right amount */
193 inf.avail_in = size + EXCESS;
194 inf.next_in = blk;
195 def.avail_out = size + EXCESS;
196 def.next_out = tmp;
197 ret = recompress(&inf, &def);
198 if (ret == Z_MEM_ERROR)
199 quit("out of memory");
200
201 /* set up for next reocmpression */
202 ret = inflateReset(&inf);
203 assert(ret != Z_STREAM_ERROR);
204 ret = deflateReset(&def);
205 assert(ret != Z_STREAM_ERROR);
206
207 /* do second and final recompression (third compression) */
208 inf.avail_in = size - MARGIN; /* assure stream will complete */
209 inf.next_in = tmp;
210 def.avail_out = size;
211 def.next_out = blk;
212 ret = recompress(&inf, &def);
213 if (ret == Z_MEM_ERROR)
214 quit("out of memory");
215 assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */
216
217 /* done -- write block to stdout */
218 have = size - def.avail_out;
219 if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
220 quit("error writing output");
221
222 /* clean up and print results to stderr */
223 free(tmp);
224 ret = inflateEnd(&inf);
225 assert(ret != Z_STREAM_ERROR);
226 ret = deflateEnd(&def);
227 assert(ret != Z_STREAM_ERROR);
228 free(blk);
229 fprintf(stderr,
230 "%u bytes unused out of %u requested (%lu input)\n",
231 size - have, size, def.total_in);
232 return 0;
233 }
234