• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2015, Google Inc.
2#
3# Permission to use, copy, modify, and/or distribute this software for any
4# purpose with or without fee is hereby granted, provided that the above
5# copyright notice and this permission notice appear in all copies.
6#
7# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15"""Extracts archives."""
16
17
18import hashlib
19import optparse
20import os
21import os.path
22import tarfile
23import shutil
24import sys
25import zipfile
26
27
28def CheckedJoin(output, path):
29  """
30  CheckedJoin returns os.path.join(output, path). It does sanity checks to
31  ensure the resulting path is under output, but shouldn't be used on untrusted
32  input.
33  """
34  path = os.path.normpath(path)
35  if os.path.isabs(path) or path.startswith('.'):
36    raise ValueError(path)
37  return os.path.join(output, path)
38
39
40def IterateZip(path):
41  """
42  IterateZip opens the zip file at path and returns a generator of
43  (filename, mode, fileobj) tuples for each file in it.
44  """
45  with zipfile.ZipFile(path, 'r') as zip_file:
46    for info in zip_file.infolist():
47      if info.filename.endswith('/'):
48        continue
49      yield (info.filename, None, zip_file.open(info))
50
51
52def IterateTar(path):
53  """
54  IterateTar opens the tar.gz file at path and returns a generator of
55  (filename, mode, fileobj) tuples for each file in it.
56  """
57  with tarfile.open(path, 'r:gz') as tar_file:
58    for info in tar_file:
59      if info.isdir():
60        continue
61      if not info.isfile():
62        raise ValueError('Unknown entry type "%s"' % (info.name, ))
63      yield (info.name, info.mode, tar_file.extractfile(info))
64
65
66def main(args):
67  parser = optparse.OptionParser(usage='Usage: %prog ARCHIVE OUTPUT')
68  parser.add_option('--no-prefix', dest='no_prefix', action='store_true',
69                    help='Do not remove a prefix from paths in the archive.')
70  options, args = parser.parse_args(args)
71
72  if len(args) != 2:
73    parser.print_help()
74    return 1
75
76  archive, output = args
77
78  if not os.path.exists(archive):
79    # Skip archives that weren't downloaded.
80    return 0
81
82  with open(archive) as f:
83    sha256 = hashlib.sha256()
84    while True:
85      chunk = f.read(1024 * 1024)
86      if not chunk:
87        break
88      sha256.update(chunk)
89    digest = sha256.hexdigest()
90
91  stamp_path = os.path.join(output, ".boringssl_archive_digest")
92  if os.path.exists(stamp_path):
93    with open(stamp_path) as f:
94      if f.read().strip() == digest:
95        print "Already up-to-date."
96        return 0
97
98  if archive.endswith('.zip'):
99    entries = IterateZip(archive)
100  elif archive.endswith('.tar.gz'):
101    entries = IterateTar(archive)
102  else:
103    raise ValueError(archive)
104
105  try:
106    if os.path.exists(output):
107      print "Removing %s" % (output, )
108      shutil.rmtree(output)
109
110    print "Extracting %s to %s" % (archive, output)
111    prefix = None
112    num_extracted = 0
113    for path, mode, inp in entries:
114      # Even on Windows, zip files must always use forward slashes.
115      if '\\' in path or path.startswith('/'):
116        raise ValueError(path)
117
118      if not options.no_prefix:
119        new_prefix, rest = path.split('/', 1)
120
121        # Ensure the archive is consistent.
122        if prefix is None:
123          prefix = new_prefix
124        if prefix != new_prefix:
125          raise ValueError((prefix, new_prefix))
126      else:
127        rest = path
128
129      # Extract the file into the output directory.
130      fixed_path = CheckedJoin(output, rest)
131      if not os.path.isdir(os.path.dirname(fixed_path)):
132        os.makedirs(os.path.dirname(fixed_path))
133      with open(fixed_path, 'wb') as out:
134        shutil.copyfileobj(inp, out)
135
136      # Fix up permissions if needbe.
137      # TODO(davidben): To be extra tidy, this should only track the execute bit
138      # as in git.
139      if mode is not None:
140        os.chmod(fixed_path, mode)
141
142      # Print every 100 files, so bots do not time out on large archives.
143      num_extracted += 1
144      if num_extracted % 100 == 0:
145        print "Extracted %d files..." % (num_extracted,)
146  finally:
147    entries.close()
148
149  with open(stamp_path, 'w') as f:
150    f.write(digest)
151
152  print "Done. Extracted %d files." % (num_extracted,)
153  return 0
154
155
156if __name__ == '__main__':
157  sys.exit(main(sys.argv[1:]))
158