• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 The Chromium Authors
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the Chromium source repository LICENSE file.
5  *
6  * A benchmark test harness for measuring decoding performance of gzip or zlib
7  * (deflate) encoded compressed data. Given a file containing any data, encode
8  * (compress) it into gzip or zlib format and then decode (uncompress). Output
9  * the median and maximum encoding and decoding rates in MB/s.
10  *
11  * Raw deflate (no gzip or zlib stream wrapper) mode is also supported. Select
12  * it with the [raw] argument. Use the [gzip] [zlib] arguments to select those
13  * stream wrappers.
14  *
15  * Note this code can be compiled outside of the Chromium build system against
16  * the system zlib (-lz) with g++ or clang++ as follows:
17  *
18  *   g++|clang++ -O3 -Wall -std=c++11 zlib_bench.cc -lstdc++ -lz
19  */
20 
21 #include <algorithm>
22 #include <chrono>
23 #include <fstream>
24 #include <memory>
25 #include <string>
26 #include <vector>
27 
28 #include <memory.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 
33 #include "zlib.h"
34 
error_exit(const char * error,int code)35 void error_exit(const char* error, int code) {
36   fprintf(stderr, "%s (%d)\n", error, code);
37   exit(code);
38 }
39 
string_data(std::string * s)40 inline char* string_data(std::string* s) {
41   return s->empty() ? nullptr : &*s->begin();
42 }
43 
44 struct Data {
DataData45   Data(size_t s) { data.reset(new (std::nothrow) char[size = s]); }
46   std::unique_ptr<char[]> data;
47   size_t size;
48   std::string name;
49 };
50 
read_file_data_or_exit(const char * name)51 Data read_file_data_or_exit(const char* name) {
52   std::ifstream file(name, std::ios::in | std::ios::binary);
53   if (!file) {
54     perror(name);
55     exit(1);
56   }
57 
58   file.seekg(0, std::ios::end);
59   Data data(file.tellg());
60   file.seekg(0, std::ios::beg);
61 
62   if (file && data.data)
63     file.read(data.data.get(), data.size);
64 
65   if (!file || !data.data || !data.size) {
66     perror((std::string("failed: reading ") + name).c_str());
67     exit(1);
68   }
69 
70   data.name = std::string(name);
71   return data;
72 }
73 
zlib_estimate_compressed_size(size_t input_size)74 size_t zlib_estimate_compressed_size(size_t input_size) {
75   return compressBound(input_size);
76 }
77 
78 enum zlib_wrapper {
79   kWrapperNONE,
80   kWrapperZLIB,
81   kWrapperGZIP,
82   kWrapperZRAW,
83 };
84 
zlib_stream_wrapper_type(zlib_wrapper type)85 inline int zlib_stream_wrapper_type(zlib_wrapper type) {
86   if (type == kWrapperZLIB) // zlib DEFLATE stream wrapper
87     return MAX_WBITS;
88   if (type == kWrapperGZIP) // gzip DEFLATE stream wrapper
89     return MAX_WBITS + 16;
90   if (type == kWrapperZRAW) // no wrapper, use raw DEFLATE
91     return -MAX_WBITS;
92   error_exit("bad wrapper type", int(type));
93   return 0;
94 }
95 
zlib_wrapper_name(zlib_wrapper type)96 const char* zlib_wrapper_name(zlib_wrapper type) {
97   if (type == kWrapperZLIB)
98     return "ZLIB";
99   if (type == kWrapperGZIP)
100     return "GZIP";
101   if (type == kWrapperZRAW)
102     return "RAW";
103   error_exit("bad wrapper type", int(type));
104   return nullptr;
105 }
106 
107 static int zlib_strategy = Z_DEFAULT_STRATEGY;
108 
zlib_level_strategy_name(int compression_level)109 const char* zlib_level_strategy_name(int compression_level) {
110   if (compression_level == 0)
111     return "";  // strategy is meaningless at level 0
112   if (zlib_strategy == Z_HUFFMAN_ONLY)
113     return "huffman ";
114   if (zlib_strategy == Z_RLE)
115     return "rle ";
116   if (zlib_strategy == Z_DEFAULT_STRATEGY)
117     return "";
118   error_exit("bad strategy", zlib_strategy);
119   return nullptr;
120 }
121 
122 static int zlib_compression_level = Z_DEFAULT_COMPRESSION;
123 
zlib_compress(const zlib_wrapper type,const char * input,const size_t input_size,std::string * output,bool resize_output=false)124 void zlib_compress(
125     const zlib_wrapper type,
126     const char* input,
127     const size_t input_size,
128     std::string* output,
129     bool resize_output = false)
130 {
131   if (resize_output)
132     output->resize(zlib_estimate_compressed_size(input_size));
133   size_t output_size = output->size();
134 
135   z_stream stream;
136   memset(&stream, 0, sizeof(stream));
137 
138   int result = deflateInit2(&stream, zlib_compression_level, Z_DEFLATED,
139       zlib_stream_wrapper_type(type), MAX_MEM_LEVEL, zlib_strategy);
140   if (result != Z_OK)
141     error_exit("deflateInit2 failed", result);
142 
143   stream.next_out = (Bytef*)string_data(output);
144   stream.avail_out = (uInt)output_size;
145   stream.next_in = (z_const Bytef*)input;
146   stream.avail_in = (uInt)input_size;
147 
148   result = deflate(&stream, Z_FINISH);
149   if (stream.avail_in > 0)
150     error_exit("compress: input was not consumed", Z_DATA_ERROR);
151   if (result == Z_STREAM_END)
152     output_size = stream.total_out;
153   result |= deflateEnd(&stream);
154   if (result != Z_STREAM_END)
155     error_exit("compress failed", result);
156 
157   if (resize_output)
158     output->resize(output_size);
159 }
160 
zlib_uncompress(const zlib_wrapper type,const std::string & input,const size_t output_size,std::string * output)161 void zlib_uncompress(
162     const zlib_wrapper type,
163     const std::string& input,
164     const size_t output_size,
165     std::string* output)
166 {
167   z_stream stream;
168   memset(&stream, 0, sizeof(stream));
169 
170   int result = inflateInit2(&stream, zlib_stream_wrapper_type(type));
171   if (result != Z_OK)
172     error_exit("inflateInit2 failed", result);
173 
174   stream.next_out = (Bytef*)string_data(output);
175   stream.avail_out = (uInt)output->size();
176   stream.next_in = (z_const Bytef*)input.data();
177   stream.avail_in = (uInt)input.size();
178 
179   result = inflate(&stream, Z_FINISH);
180   if (stream.total_out != output_size)
181     result = Z_DATA_ERROR;
182   result |= inflateEnd(&stream);
183   if (result == Z_STREAM_END)
184     return;
185 
186   std::string error("uncompress failed: ");
187   if (stream.msg)
188     error.append(stream.msg);
189   error_exit(error.c_str(), result);
190 }
191 
verify_equal(const char * input,size_t size,std::string * output)192 void verify_equal(const char* input, size_t size, std::string* output) {
193   const char* data = string_data(output);
194   if (output->size() == size && !memcmp(data, input, size))
195     return;
196   fprintf(stderr, "uncompressed data does not match the input data\n");
197   exit(3);
198 }
199 
check_file(const Data & file,zlib_wrapper type,int mode)200 void check_file(const Data& file, zlib_wrapper type, int mode) {
201   printf("%s %d %s%s\n", zlib_wrapper_name(type), zlib_compression_level,
202     zlib_level_strategy_name(zlib_compression_level), file.name.c_str());
203 
204   // Compress the file data.
205   std::string compressed;
206   zlib_compress(type, file.data.get(), file.size, &compressed, true);
207 
208   // Output compressed data integrity check: the data crc32.
209   unsigned long check = crc32_z(0, Z_NULL, 0);
210   const Bytef* data = (const Bytef*)compressed.data();
211   static_assert(sizeof(z_size_t) == sizeof(size_t), "z_size_t size");
212   check = crc32_z(check, data, (z_size_t)compressed.size());
213 
214   const size_t compressed_length = compressed.size();
215   printf("data crc32 %.8lx length %zu\n", check, compressed_length);
216 
217   // Output gzip or zlib DEFLATE stream internal check data.
218   if (type == kWrapperGZIP) {
219     uint32_t prev_word, last_word;
220     data += compressed_length - 8;
221     prev_word = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0];
222     data += 4;  // last compressed data word
223     last_word = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0];
224     printf("gzip crc32 %.8x length %u\n", prev_word, last_word);
225   } else if (type == kWrapperZLIB) {
226     uint32_t last_word;
227     data += compressed_length - 4;
228     last_word = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3];
229     printf("zlib adler %.8x\n", last_word);
230   }
231 
232   if (mode == 2)  // --check-binary: output compressed data.
233     fwrite(compressed.data(), compressed_length, 1, stdout);
234 
235   if (fflush(stdout), ferror(stdout))
236     error_exit("check file: error writing output", 3);
237 }
238 
zlib_file(const char * name,zlib_wrapper type,int width,int check)239 void zlib_file(const char* name, zlib_wrapper type, int width, int check) {
240   /*
241    * Read the file data.
242    */
243   struct Data file = read_file_data_or_exit(name);
244   const int length = static_cast<int>(file.size);
245   const char* data = file.data.get();
246 
247   /*
248    * Compress file: report output data checks and return.
249    */
250   if (check) {
251     file.name = file.name.substr(file.name.find_last_of("/\\") + 1);
252     check_file(file, type, check);
253     return;
254   }
255 
256   /*
257    * Report compression strategy and file name.
258    */
259   const char* strategy = zlib_level_strategy_name(zlib_compression_level);
260   printf("%s%-40s :\n", strategy, name);
261 
262   /*
263    * Chop the data into blocks.
264    */
265   const int block_size = 1 << 20;
266   const int blocks = (length + block_size - 1) / block_size;
267 
268   std::vector<const char*> input(blocks);
269   std::vector<size_t> input_length(blocks);
270   std::vector<std::string> compressed(blocks);
271   std::vector<std::string> output(blocks);
272 
273   for (int b = 0; b < blocks; ++b) {
274     int input_start = b * block_size;
275     int input_limit = std::min<int>((b + 1) * block_size, length);
276     input[b] = data + input_start;
277     input_length[b] = input_limit - input_start;
278   }
279 
280   /*
281    * Run the zlib compress/uncompress loop a few times with |repeats| to
282    * process about 10MB of data if the length is small relative to 10MB.
283    * If length is large relative to 10MB, process the data once.
284    */
285   const int mega_byte = 1024 * 1024;
286   const int repeats = (10 * mega_byte + length) / (length + 1);
287   const int runs = 5;
288   double ctime[runs];
289   double utime[runs];
290 
291   for (int run = 0; run < runs; ++run) {
292     const auto now = [] { return std::chrono::steady_clock::now(); };
293 
294     // Pre-grow the output buffer so we don't measure string resize time.
295     for (int b = 0; b < blocks; ++b)
296       compressed[b].resize(zlib_estimate_compressed_size(block_size));
297 
298     auto start = now();
299     for (int b = 0; b < blocks; ++b)
300       for (int r = 0; r < repeats; ++r)
301         zlib_compress(type, input[b], input_length[b], &compressed[b]);
302     ctime[run] = std::chrono::duration<double>(now() - start).count();
303 
304     // Compress again, resizing compressed, so we don't leave junk at the
305     // end of the compressed string that could confuse zlib_uncompress().
306     for (int b = 0; b < blocks; ++b)
307       zlib_compress(type, input[b], input_length[b], &compressed[b], true);
308 
309     for (int b = 0; b < blocks; ++b)
310       output[b].resize(input_length[b]);
311 
312     start = now();
313     for (int r = 0; r < repeats; ++r)
314       for (int b = 0; b < blocks; ++b)
315         zlib_uncompress(type, compressed[b], input_length[b], &output[b]);
316     utime[run] = std::chrono::duration<double>(now() - start).count();
317 
318     for (int b = 0; b < blocks; ++b)
319       verify_equal(input[b], input_length[b], &output[b]);
320   }
321 
322   /*
323    * Output the median/maximum compress/uncompress rates in MB/s.
324    */
325   size_t output_length = 0;
326   for (size_t i = 0; i < compressed.size(); ++i)
327     output_length += compressed[i].size();
328 
329   std::sort(ctime, ctime + runs);
330   std::sort(utime, utime + runs);
331 
332   double deflate_rate_med = length * repeats / mega_byte / ctime[runs / 2];
333   double inflate_rate_med = length * repeats / mega_byte / utime[runs / 2];
334   double deflate_rate_max = length * repeats / mega_byte / ctime[0];
335   double inflate_rate_max = length * repeats / mega_byte / utime[0];
336 
337   // type, block size, compression ratio, etc
338   printf("%s: [b %dM] bytes %*d -> %*u %4.2f%%",
339     zlib_wrapper_name(type), block_size / (1 << 20), width, length, width,
340     unsigned(output_length), output_length * 100.0 / length);
341 
342   // compress / uncompress median (max) rates
343   printf(" comp %5.1f (%5.1f) MB/s uncomp %5.1f (%5.1f) MB/s\n",
344     deflate_rate_med, deflate_rate_max, inflate_rate_med, inflate_rate_max);
345 }
346 
347 static int argn = 1;
348 
get_option(int argc,char * argv[],const char * option)349 char* get_option(int argc, char* argv[], const char* option) {
350   if (argn < argc)
351     return !strcmp(argv[argn], option) ? argv[argn++] : nullptr;
352   return nullptr;
353 }
354 
get_compression(int argc,char * argv[],int & value)355 bool get_compression(int argc, char* argv[], int& value) {
356   if (argn < argc)
357     value = isdigit(argv[argn][0]) ? atoi(argv[argn++]) : -1;
358   return value >= 0 && value <= 9;
359 }
360 
get_field_width(int argc,char * argv[],int & value)361 void get_field_width(int argc, char* argv[], int& value) {
362   value = atoi(argv[argn++]);
363 }
364 
usage_exit(const char * program)365 void usage_exit(const char* program) {
366   static auto* options = "gzip|zlib|raw"
367     " [--compression 0:9] [--huffman|--rle] [--field width] [--check]";
368   printf("usage: %s %s files ...\n", program, options);
369   printf("zlib version: %s\n", ZLIB_VERSION);
370   exit(1);
371 }
372 
main(int argc,char * argv[])373 int main(int argc, char* argv[]) {
374   zlib_wrapper type;
375   if (get_option(argc, argv, "zlib"))
376     type = kWrapperZLIB;
377   else if (get_option(argc, argv, "gzip"))
378     type = kWrapperGZIP;
379   else if (get_option(argc, argv, "raw"))
380     type = kWrapperZRAW;
381   else
382     usage_exit(argv[0]);
383 
384   int size_field_width = 0;
385   int file_check = 0;
386 
387   while (argn < argc && argv[argn][0] == '-') {
388     if (get_option(argc, argv, "--compression")) {
389       if (!get_compression(argc, argv, zlib_compression_level))
390         usage_exit(argv[0]);
391     } else if (get_option(argc, argv, "--huffman")) {
392       zlib_strategy = Z_HUFFMAN_ONLY;
393     } else if (get_option(argc, argv, "--rle")) {
394       zlib_strategy = Z_RLE;
395     } else if (get_option(argc, argv, "--check")) {
396       file_check = 1;
397     } else if (get_option(argc, argv, "--check-binary")) {
398       file_check = 2;
399     } else if (get_option(argc, argv, "--field")) {
400       get_field_width(argc, argv, size_field_width);
401     } else {
402       usage_exit(argv[0]);
403     }
404   }
405 
406   if (argn >= argc)
407     usage_exit(argv[0]);
408 
409   if (size_field_width < 6)
410     size_field_width = 6;
411   while (argn < argc)
412     zlib_file(argv[argn++], type, size_field_width, file_check);
413 
414   return 0;
415 }
416