• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 The Chromium Authors
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the Chromium source repository LICENSE file.
5  *
6  * A benchmark test harness for measuring decoding performance of gzip or zlib
7  * (deflate) encoded compressed data. Given a file containing any data, encode
8  * (compress) it into gzip or zlib format and then decode (uncompress). Output
9  * the median and maximum encoding and decoding rates in MB/s.
10  *
11  * Raw deflate (no gzip or zlib stream wrapper) mode is also supported. Select
12  * it with the [raw] argument. Use the [gzip] [zlib] arguments to select those
13  * stream wrappers.
14  *
15  * Note this code can be compiled outside of the Chromium build system against
16  * the system zlib (-lz) with g++ or clang++ as follows:
17  *
18  *   g++|clang++ -O3 -Wall -std=c++11 zlib_bench.cc -lstdc++ -lz
19  */
20 
21 #include <algorithm>
22 #include <chrono>
23 #include <fstream>
24 #include <memory>
25 #include <string>
26 #include <vector>
27 
28 #include <memory.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 
33 #include "zlib.h"
34 
error_exit(const char * error,int code)35 void error_exit(const char* error, int code) {
36   fprintf(stderr, "%s (%d)\n", error, code);
37   exit(code);
38 }
39 
string_data(std::string * s)40 inline char* string_data(std::string* s) {
41   return s->empty() ? nullptr : &*s->begin();
42 }
43 
44 struct Data {
DataData45   Data(size_t s) { data.reset(new (std::nothrow) char[size = s]); }
46   std::unique_ptr<char[]> data;
47   size_t size;
48   std::string name;
49 };
50 
read_file_data_or_exit(const char * name)51 Data read_file_data_or_exit(const char* name) {
52   std::ifstream file(name, std::ios::in | std::ios::binary);
53   if (!file) {
54     perror(name);
55     exit(1);
56   }
57 
58   file.seekg(0, std::ios::end);
59   Data data(file.tellg());
60   file.seekg(0, std::ios::beg);
61 
62   if (file && data.data)
63     file.read(data.data.get(), data.size);
64 
65   if (!file || !data.data || !data.size) {
66     perror((std::string("failed: reading ") + name).c_str());
67     exit(1);
68   }
69 
70   data.name = std::string(name);
71   return data;
72 }
73 
zlib_estimate_compressed_size(size_t input_size)74 size_t zlib_estimate_compressed_size(size_t input_size) {
75   return compressBound(input_size);
76 }
77 
78 enum zlib_wrapper {
79   kWrapperNONE,
80   kWrapperZLIB,
81   kWrapperGZIP,
82   kWrapperZRAW,
83 };
84 
zlib_stream_wrapper_type(zlib_wrapper type)85 inline int zlib_stream_wrapper_type(zlib_wrapper type) {
86   if (type == kWrapperZLIB) // zlib DEFLATE stream wrapper
87     return MAX_WBITS;
88   if (type == kWrapperGZIP) // gzip DEFLATE stream wrapper
89     return MAX_WBITS + 16;
90   if (type == kWrapperZRAW) // no wrapper, use raw DEFLATE
91     return -MAX_WBITS;
92   error_exit("bad wrapper type", int(type));
93   return 0;
94 }
95 
zlib_wrapper_name(zlib_wrapper type)96 const char* zlib_wrapper_name(zlib_wrapper type) {
97   if (type == kWrapperZLIB)
98     return "ZLIB";
99   if (type == kWrapperGZIP)
100     return "GZIP";
101   if (type == kWrapperZRAW)
102     return "RAW";
103   error_exit("bad wrapper type", int(type));
104   return nullptr;
105 }
106 
107 static int zlib_strategy = Z_DEFAULT_STRATEGY;
108 
zlib_level_strategy_name(int compression_level)109 const char* zlib_level_strategy_name(int compression_level) {
110   if (compression_level == 0)
111     return "";  // strategy is meaningless at level 0
112   if (zlib_strategy == Z_HUFFMAN_ONLY)
113     return "huffman ";
114   if (zlib_strategy == Z_RLE)
115     return "rle ";
116   if (zlib_strategy == Z_DEFAULT_STRATEGY)
117     return "";
118   error_exit("bad strategy", zlib_strategy);
119   return nullptr;
120 }
121 
122 static int zlib_compression_level = Z_DEFAULT_COMPRESSION;
123 
zlib_compress(const zlib_wrapper type,const char * input,const size_t input_size,std::string * output,bool resize_output=false)124 void zlib_compress(
125     const zlib_wrapper type,
126     const char* input,
127     const size_t input_size,
128     std::string* output,
129     bool resize_output = false)
130 {
131   if (resize_output)
132     output->resize(zlib_estimate_compressed_size(input_size));
133   size_t output_size = output->size();
134 
135   z_stream stream;
136   memset(&stream, 0, sizeof(stream));
137 
138   int result = deflateInit2(&stream, zlib_compression_level, Z_DEFLATED,
139       zlib_stream_wrapper_type(type), MAX_MEM_LEVEL, zlib_strategy);
140   if (result != Z_OK)
141     error_exit("deflateInit2 failed", result);
142 
143   stream.next_out = (Bytef*)string_data(output);
144   stream.avail_out = (uInt)output_size;
145   stream.next_in = (z_const Bytef*)input;
146   stream.avail_in = (uInt)input_size;
147 
148   result = deflate(&stream, Z_FINISH);
149   if (stream.avail_in > 0)
150     error_exit("compress: input was not consumed", Z_DATA_ERROR);
151   if (result == Z_STREAM_END)
152     output_size = stream.total_out;
153   result |= deflateEnd(&stream);
154   if (result != Z_STREAM_END)
155     error_exit("compress failed", result);
156 
157   if (resize_output)
158     output->resize(output_size);
159 }
160 
zlib_uncompress(const zlib_wrapper type,const std::string & input,const size_t output_size,std::string * output)161 void zlib_uncompress(
162     const zlib_wrapper type,
163     const std::string& input,
164     const size_t output_size,
165     std::string* output)
166 {
167   z_stream stream;
168   memset(&stream, 0, sizeof(stream));
169 
170   int result = inflateInit2(&stream, zlib_stream_wrapper_type(type));
171   if (result != Z_OK)
172     error_exit("inflateInit2 failed", result);
173 
174   stream.next_out = (Bytef*)string_data(output);
175   stream.avail_out = (uInt)output->size();
176   stream.next_in = (z_const Bytef*)input.data();
177   stream.avail_in = (uInt)input.size();
178 
179   result = inflate(&stream, Z_FINISH);
180   if (stream.total_out != output_size)
181     result = Z_DATA_ERROR;
182   result |= inflateEnd(&stream);
183   if (result == Z_STREAM_END)
184     return;
185 
186   std::string error("uncompress failed: ");
187   if (stream.msg)
188     error.append(stream.msg);
189   error_exit(error.c_str(), result);
190 }
191 
verify_equal(const char * input,size_t size,std::string * output)192 void verify_equal(const char* input, size_t size, std::string* output) {
193   const char* data = string_data(output);
194   if (output->size() == size && !memcmp(data, input, size))
195     return;
196   fprintf(stderr, "uncompressed data does not match the input data\n");
197   exit(3);
198 }
199 
check_file(const Data & file,zlib_wrapper type,int mode)200 void check_file(const Data& file, zlib_wrapper type, int mode) {
201   printf("%s %d %s%s\n", zlib_wrapper_name(type), zlib_compression_level,
202     zlib_level_strategy_name(zlib_compression_level), file.name.c_str());
203 
204   // Compress the file data.
205   std::string compressed;
206   zlib_compress(type, file.data.get(), file.size, &compressed, true);
207 
208   // Output compressed data integrity check: the data crc32.
209   unsigned long check = crc32_z(0, Z_NULL, 0);
210   const Bytef* data = (const Bytef*)compressed.data();
211   static_assert(sizeof(z_size_t) == sizeof(size_t), "z_size_t size");
212   check = crc32_z(check, data, (z_size_t)compressed.size());
213 
214   const size_t compressed_length = compressed.size();
215   printf("data crc32 %.8lx length %zu\n", check, compressed_length);
216 
217   // Output gzip or zlib DEFLATE stream internal check data.
218   if (type == kWrapperGZIP) {
219     uint32_t prev_word, last_word;
220     data += compressed_length - 8;
221     prev_word = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0];
222     data += 4;  // last compressed data word
223     last_word = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0];
224     printf("gzip crc32 %.8x length %u\n", prev_word, last_word);
225   } else if (type == kWrapperZLIB) {
226     uint32_t last_word;
227     data += compressed_length - 4;
228     last_word = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3];
229     printf("zlib adler %.8x\n", last_word);
230   }
231 
232   if (mode == 2)  // --check-binary: output compressed data.
233     fwrite(compressed.data(), compressed_length, 1, stdout);
234 
235   if (fflush(stdout), ferror(stdout))
236     error_exit("check file: error writing output", 3);
237 }
238 
zlib_file(const char * name,zlib_wrapper type,int width,int check,bool output_csv_format)239 void zlib_file(const char* name,
240                zlib_wrapper type,
241                int width,
242                int check,
243                bool output_csv_format) {
244   /*
245    * Read the file data.
246    */
247   struct Data file = read_file_data_or_exit(name);
248   const int length = static_cast<int>(file.size);
249   const char* data = file.data.get();
250 
251   /*
252    * Compress file: report output data checks and return.
253    */
254   if (check) {
255     file.name = file.name.substr(file.name.find_last_of("/\\") + 1);
256     check_file(file, type, check);
257     return;
258   }
259 
260   /*
261    * Report compression strategy and file name.
262    */
263   const char* strategy = zlib_level_strategy_name(zlib_compression_level);
264   if (!output_csv_format) {
265     printf("%s%-40s :\n", strategy, name);
266   }
267 
268   /*
269    * Chop the data into blocks.
270    */
271   const int block_size = 1 << 20;
272   const int blocks = (length + block_size - 1) / block_size;
273 
274   std::vector<const char*> input(blocks);
275   std::vector<size_t> input_length(blocks);
276   std::vector<std::string> compressed(blocks);
277   std::vector<std::string> output(blocks);
278 
279   for (int b = 0; b < blocks; ++b) {
280     int input_start = b * block_size;
281     int input_limit = std::min<int>((b + 1) * block_size, length);
282     input[b] = data + input_start;
283     input_length[b] = input_limit - input_start;
284   }
285 
286   /*
287    * Run the zlib compress/uncompress loop a few times with |repeats| to
288    * process about 10MB of data if the length is small relative to 10MB.
289    * If length is large relative to 10MB, process the data once.
290    */
291   const int mega_byte = 1024 * 1024;
292   const int repeats = (10 * mega_byte + length) / (length + 1);
293   const int runs = 5;
294   double ctime[runs];
295   double utime[runs];
296 
297   for (int run = 0; run < runs; ++run) {
298     const auto now = [] { return std::chrono::steady_clock::now(); };
299 
300     // Pre-grow the output buffer so we don't measure string resize time.
301     for (int b = 0; b < blocks; ++b)
302       compressed[b].resize(zlib_estimate_compressed_size(block_size));
303 
304     auto start = now();
305     for (int b = 0; b < blocks; ++b)
306       for (int r = 0; r < repeats; ++r)
307         zlib_compress(type, input[b], input_length[b], &compressed[b]);
308     ctime[run] = std::chrono::duration<double>(now() - start).count();
309 
310     // Compress again, resizing compressed, so we don't leave junk at the
311     // end of the compressed string that could confuse zlib_uncompress().
312     for (int b = 0; b < blocks; ++b)
313       zlib_compress(type, input[b], input_length[b], &compressed[b], true);
314 
315     for (int b = 0; b < blocks; ++b)
316       output[b].resize(input_length[b]);
317 
318     start = now();
319     for (int r = 0; r < repeats; ++r)
320       for (int b = 0; b < blocks; ++b)
321         zlib_uncompress(type, compressed[b], input_length[b], &output[b]);
322     utime[run] = std::chrono::duration<double>(now() - start).count();
323 
324     for (int b = 0; b < blocks; ++b)
325       verify_equal(input[b], input_length[b], &output[b]);
326   }
327 
328   /*
329    * Output the median/maximum compress/uncompress rates in MB/s.
330    */
331   size_t output_length = 0;
332   for (size_t i = 0; i < compressed.size(); ++i)
333     output_length += compressed[i].size();
334 
335   std::sort(ctime, ctime + runs);
336   std::sort(utime, utime + runs);
337 
338   double deflate_rate_med, inflate_rate_med, deflate_rate_max, inflate_rate_max;
339   deflate_rate_med = length * repeats / mega_byte / ctime[runs / 2];
340   inflate_rate_med = length * repeats / mega_byte / utime[runs / 2];
341   deflate_rate_max = length * repeats / mega_byte / ctime[0];
342   inflate_rate_max = length * repeats / mega_byte / utime[0];
343   double compress_ratio = output_length * 100.0 / length;
344 
345   if (!output_csv_format) {
346     // type, block size, compression ratio, etc
347     printf("%s: [b %dM] bytes %*d -> %*u %4.2f%%", zlib_wrapper_name(type),
348            block_size / (1 << 20), width, length, width,
349            unsigned(output_length), compress_ratio);
350 
351     // compress / uncompress median (max) rates
352     printf(" comp %5.1f (%5.1f) MB/s uncomp %5.1f (%5.1f) MB/s\n",
353            deflate_rate_med, deflate_rate_max, inflate_rate_med,
354            inflate_rate_max);
355   } else {
356     printf("%s\t%.5lf\t%.5lf\t%.5lf\t%.5lf\t%.5lf\n", name, deflate_rate_med,
357            inflate_rate_med, deflate_rate_max, inflate_rate_max,
358            compress_ratio);
359   }
360 }
361 
362 static int argn = 1;
363 
get_option(int argc,char * argv[],const char * option)364 char* get_option(int argc, char* argv[], const char* option) {
365   if (argn < argc)
366     return !strcmp(argv[argn], option) ? argv[argn++] : nullptr;
367   return nullptr;
368 }
369 
get_compression(int argc,char * argv[],int & value)370 bool get_compression(int argc, char* argv[], int& value) {
371   if (argn < argc)
372     value = isdigit(argv[argn][0]) ? atoi(argv[argn++]) : -1;
373   return value >= 0 && value <= 9;
374 }
375 
get_field_width(int argc,char * argv[],int & value)376 void get_field_width(int argc, char* argv[], int& value) {
377   value = atoi(argv[argn++]);
378 }
379 
usage_exit(const char * program)380 void usage_exit(const char* program) {
381   static auto* options =
382       "gzip|zlib|raw"
383       " [--compression 0:9] [--huffman|--rle] [--field width] [--check]"
384       " [--csv]";
385   printf("usage: %s %s files ...\n", program, options);
386   printf("zlib version: %s\n", ZLIB_VERSION);
387   exit(1);
388 }
389 
main(int argc,char * argv[])390 int main(int argc, char* argv[]) {
391   zlib_wrapper type;
392   if (get_option(argc, argv, "zlib"))
393     type = kWrapperZLIB;
394   else if (get_option(argc, argv, "gzip"))
395     type = kWrapperGZIP;
396   else if (get_option(argc, argv, "raw"))
397     type = kWrapperZRAW;
398   else
399     usage_exit(argv[0]);
400 
401   int size_field_width = 0;
402   int file_check = 0;
403   bool output_csv = false;
404   while (argn < argc && argv[argn][0] == '-') {
405     if (get_option(argc, argv, "--compression")) {
406       if (!get_compression(argc, argv, zlib_compression_level))
407         usage_exit(argv[0]);
408     } else if (get_option(argc, argv, "--huffman")) {
409       zlib_strategy = Z_HUFFMAN_ONLY;
410     } else if (get_option(argc, argv, "--rle")) {
411       zlib_strategy = Z_RLE;
412     } else if (get_option(argc, argv, "--check")) {
413       file_check = 1;
414     } else if (get_option(argc, argv, "--check-binary")) {
415       file_check = 2;
416     } else if (get_option(argc, argv, "--field")) {
417       get_field_width(argc, argv, size_field_width);
418     } else if (get_option(argc, argv, "--csv")) {
419       output_csv = true;
420       printf(
421           "filename\tcompression\tdecompression\tcomp_max\t"
422           "decomp_max\tcompress_ratio\n");
423     } else {
424       usage_exit(argv[0]);
425     }
426   }
427 
428   if (argn >= argc)
429     usage_exit(argv[0]);
430 
431   if (size_field_width < 6)
432     size_field_width = 6;
433   while (argn < argc) {
434     zlib_file(argv[argn++], type, size_field_width, file_check, output_csv);
435   }
436 
437   return 0;
438 }
439