• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright 2018 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6#
7
8"""A tool for running diffing tools and measuring patch sizes."""
9
10import argparse
11import logging
12import os
13import subprocess
14import sys
15import tempfile
16
17
18class Error(Exception):
19  """Puffin general processing error."""
20
21
22def ParseArguments(argv):
23  """Parses and Validates command line arguments.
24
25  Args:
26    argv: command line arguments to parse.
27
28  Returns:
29    The arguments list.
30  """
31  parser = argparse.ArgumentParser()
32
33  parser.add_argument('--src-corpus', metavar='DIR',
34                      help='The source corpus directory with compressed files.')
35  parser.add_argument('--tgt-corpus', metavar='DIR',
36                      help='The target corpus directory with compressed files.')
37  parser.add_argument('--debug', action='store_true',
38                      help='Turns on verbosity.')
39
40  # Parse command-line arguments.
41  args = parser.parse_args(argv)
42
43  for corpus in (args.src_corpus, args.tgt_corpus):
44    if not corpus or not os.path.isdir(corpus):
45      raise Error('Corpus directory {} is non-existent or inaccesible'
46                  .format(corpus))
47  return args
48
49
50def main(argv):
51  """The main function."""
52  args = ParseArguments(argv[1:])
53
54  if args.debug:
55    logging.getLogger().setLevel(logging.DEBUG)
56
57  # Construct list of appropriate files.
58  src_files = list(filter(os.path.isfile,
59                          [os.path.join(args.src_corpus, f)
60                           for f in os.listdir(args.src_corpus)]))
61  tgt_files = list(filter(os.path.isfile,
62                          [os.path.join(args.tgt_corpus, f)
63                           for f in os.listdir(args.tgt_corpus)]))
64
65  # Check if all files in src_files have a target file in tgt_files.
66  files_mismatch = (set(map(os.path.basename, src_files)) -
67                    set(map(os.path.basename, tgt_files)))
68  if files_mismatch:
69    raise Error('Target files {} do not exist in corpus: {}'
70                .format(files_mismatch, args.tgt_corpus))
71
72  for src in src_files:
73    with tempfile.NamedTemporaryFile() as puffdiff_patch, \
74         tempfile.NamedTemporaryFile() as bsdiff_patch:
75
76      tgt = os.path.join(args.tgt_corpus, os.path.basename(src))
77
78      operation = 'puffdiff'
79      cmd = ['puffin',
80             '--operation={}'.format(operation),
81             '--src_file={}'.format(src),
82             '--dst_file={}'.format(tgt),
83             '--patch_file={}'.format(puffdiff_patch.name)]
84      # Running the puffdiff operation
85      if subprocess.call(cmd) != 0:
86        raise Error('Puffin failed to do {} command: {}'
87                    .format(operation, cmd))
88
89      operation = 'bsdiff'
90      cmd = ['bsdiff', '--type', 'bz2', src, tgt, bsdiff_patch.name]
91      # Running the bsdiff operation
92      if subprocess.call(cmd) != 0:
93        raise Error('Failed to do {} command: {}'
94                    .format(operation, cmd))
95
96      logging.debug('%s(%d -> %d) : bsdiff(%d), puffdiff(%d)',
97                    os.path.basename(src),
98                    os.stat(src).st_size, os.stat(tgt).st_size,
99                    os.stat(bsdiff_patch.name).st_size,
100                    os.stat(puffdiff_patch.name).st_size)
101
102  return 0
103
104
105if __name__ == '__main__':
106  sys.exit(main(sys.argv))
107