1# Copyright (c) 2015, Google Inc. 2# 3# Permission to use, copy, modify, and/or distribute this software for any 4# purpose with or without fee is hereby granted, provided that the above 5# copyright notice and this permission notice appear in all copies. 6# 7# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15"""Extracts archives.""" 16 17 18import hashlib 19import optparse 20import os 21import os.path 22import tarfile 23import shutil 24import sys 25import zipfile 26 27 28def CheckedJoin(output, path): 29 """ 30 CheckedJoin returns os.path.join(output, path). It does sanity checks to 31 ensure the resulting path is under output, but shouldn't be used on untrusted 32 input. 33 """ 34 path = os.path.normpath(path) 35 if os.path.isabs(path) or path.startswith('.'): 36 raise ValueError(path) 37 return os.path.join(output, path) 38 39 40def IterateZip(path): 41 """ 42 IterateZip opens the zip file at path and returns a generator of 43 (filename, mode, fileobj) tuples for each file in it. 44 """ 45 with zipfile.ZipFile(path, 'r') as zip_file: 46 for info in zip_file.infolist(): 47 if info.filename.endswith('/'): 48 continue 49 yield (info.filename, None, zip_file.open(info)) 50 51 52def IterateTar(path): 53 """ 54 IterateTar opens the tar.gz file at path and returns a generator of 55 (filename, mode, fileobj) tuples for each file in it. 56 """ 57 with tarfile.open(path, 'r:gz') as tar_file: 58 for info in tar_file: 59 if info.isdir(): 60 continue 61 if not info.isfile(): 62 raise ValueError('Unknown entry type "%s"' % (info.name, )) 63 yield (info.name, info.mode, tar_file.extractfile(info)) 64 65 66def main(args): 67 parser = optparse.OptionParser(usage='Usage: %prog ARCHIVE OUTPUT') 68 parser.add_option('--no-prefix', dest='no_prefix', action='store_true', 69 help='Do not remove a prefix from paths in the archive.') 70 options, args = parser.parse_args(args) 71 72 if len(args) != 2: 73 parser.print_help() 74 return 1 75 76 archive, output = args 77 78 if not os.path.exists(archive): 79 # Skip archives that weren't downloaded. 80 return 0 81 82 with open(archive) as f: 83 sha256 = hashlib.sha256() 84 while True: 85 chunk = f.read(1024 * 1024) 86 if not chunk: 87 break 88 sha256.update(chunk) 89 digest = sha256.hexdigest() 90 91 stamp_path = os.path.join(output, ".boringssl_archive_digest") 92 if os.path.exists(stamp_path): 93 with open(stamp_path) as f: 94 if f.read().strip() == digest: 95 print "Already up-to-date." 96 return 0 97 98 if archive.endswith('.zip'): 99 entries = IterateZip(archive) 100 elif archive.endswith('.tar.gz'): 101 entries = IterateTar(archive) 102 else: 103 raise ValueError(archive) 104 105 try: 106 if os.path.exists(output): 107 print "Removing %s" % (output, ) 108 shutil.rmtree(output) 109 110 print "Extracting %s to %s" % (archive, output) 111 prefix = None 112 num_extracted = 0 113 for path, mode, inp in entries: 114 # Even on Windows, zip files must always use forward slashes. 115 if '\\' in path or path.startswith('/'): 116 raise ValueError(path) 117 118 if not options.no_prefix: 119 new_prefix, rest = path.split('/', 1) 120 121 # Ensure the archive is consistent. 122 if prefix is None: 123 prefix = new_prefix 124 if prefix != new_prefix: 125 raise ValueError((prefix, new_prefix)) 126 else: 127 rest = path 128 129 # Extract the file into the output directory. 130 fixed_path = CheckedJoin(output, rest) 131 if not os.path.isdir(os.path.dirname(fixed_path)): 132 os.makedirs(os.path.dirname(fixed_path)) 133 with open(fixed_path, 'wb') as out: 134 shutil.copyfileobj(inp, out) 135 136 # Fix up permissions if needbe. 137 # TODO(davidben): To be extra tidy, this should only track the execute bit 138 # as in git. 139 if mode is not None: 140 os.chmod(fixed_path, mode) 141 142 # Print every 100 files, so bots do not time out on large archives. 143 num_extracted += 1 144 if num_extracted % 100 == 0: 145 print "Extracted %d files..." % (num_extracted,) 146 finally: 147 entries.close() 148 149 with open(stamp_path, 'w') as f: 150 f.write(digest) 151 152 print "Done. Extracted %d files." % (num_extracted,) 153 return 0 154 155 156if __name__ == '__main__': 157 sys.exit(main(sys.argv[1:])) 158