1#!/usr/bin/env python3 2# 3# Copyright 2018 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6# 7 8"""A tool for running diffing tools and measuring patch sizes.""" 9 10import argparse 11import logging 12import os 13import subprocess 14import sys 15import tempfile 16 17 18class Error(Exception): 19 """Puffin general processing error.""" 20 21 22def ParseArguments(argv): 23 """Parses and Validates command line arguments. 24 25 Args: 26 argv: command line arguments to parse. 27 28 Returns: 29 The arguments list. 30 """ 31 parser = argparse.ArgumentParser() 32 33 parser.add_argument('--src-corpus', metavar='DIR', 34 help='The source corpus directory with compressed files.') 35 parser.add_argument('--tgt-corpus', metavar='DIR', 36 help='The target corpus directory with compressed files.') 37 parser.add_argument('--debug', action='store_true', 38 help='Turns on verbosity.') 39 40 # Parse command-line arguments. 41 args = parser.parse_args(argv) 42 43 for corpus in (args.src_corpus, args.tgt_corpus): 44 if not corpus or not os.path.isdir(corpus): 45 raise Error('Corpus directory {} is non-existent or inaccesible' 46 .format(corpus)) 47 return args 48 49 50def main(argv): 51 """The main function.""" 52 args = ParseArguments(argv[1:]) 53 54 if args.debug: 55 logging.getLogger().setLevel(logging.DEBUG) 56 57 # Construct list of appropriate files. 58 src_files = list(filter(os.path.isfile, 59 [os.path.join(args.src_corpus, f) 60 for f in os.listdir(args.src_corpus)])) 61 tgt_files = list(filter(os.path.isfile, 62 [os.path.join(args.tgt_corpus, f) 63 for f in os.listdir(args.tgt_corpus)])) 64 65 # Check if all files in src_files have a target file in tgt_files. 66 files_mismatch = (set(map(os.path.basename, src_files)) - 67 set(map(os.path.basename, tgt_files))) 68 if files_mismatch: 69 raise Error('Target files {} do not exist in corpus: {}' 70 .format(files_mismatch, args.tgt_corpus)) 71 72 for src in src_files: 73 with tempfile.NamedTemporaryFile() as puffdiff_patch, \ 74 tempfile.NamedTemporaryFile() as bsdiff_patch: 75 76 tgt = os.path.join(args.tgt_corpus, os.path.basename(src)) 77 78 operation = 'puffdiff' 79 cmd = ['puffin', 80 '--operation={}'.format(operation), 81 '--src_file={}'.format(src), 82 '--dst_file={}'.format(tgt), 83 '--patch_file={}'.format(puffdiff_patch.name)] 84 # Running the puffdiff operation 85 if subprocess.call(cmd) != 0: 86 raise Error('Puffin failed to do {} command: {}' 87 .format(operation, cmd)) 88 89 operation = 'bsdiff' 90 cmd = ['bsdiff', '--type', 'bz2', src, tgt, bsdiff_patch.name] 91 # Running the bsdiff operation 92 if subprocess.call(cmd) != 0: 93 raise Error('Failed to do {} command: {}' 94 .format(operation, cmd)) 95 96 logging.debug('%s(%d -> %d) : bsdiff(%d), puffdiff(%d)', 97 os.path.basename(src), 98 os.stat(src).st_size, os.stat(tgt).st_size, 99 os.stat(bsdiff_patch.name).st_size, 100 os.stat(puffdiff_patch.name).st_size) 101 102 return 0 103 104 105if __name__ == '__main__': 106 sys.exit(main(sys.argv)) 107