• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A utility script for downloading versioned Syzygy binaries."""
7
8import hashlib
9import errno
10import json
11import logging
12import optparse
13import os
14import re
15import shutil
16import stat
17import sys
18import subprocess
19import tempfile
20import time
21import zipfile
22
23
24_LOGGER = logging.getLogger(os.path.basename(__file__))
25
26# The relative path where official builds are archived in their GS bucket.
27_SYZYGY_ARCHIVE_PATH = ('/builds/official/%(revision)s')
28
29# A JSON file containing the state of the download directory. If this file and
30# directory state do not agree, then the binaries will be downloaded and
31# installed again.
32_STATE = '.state'
33
34# This matches an integer (an SVN revision number) or a SHA1 value (a GIT hash).
35# The archive exclusively uses lowercase GIT hashes.
36_REVISION_RE = re.compile('^(?:\d+|[a-f0-9]{40})$')
37
38# This matches an MD5 hash.
39_MD5_RE = re.compile('^[a-f0-9]{32}$')
40
41# List of reources to be downloaded and installed. These are tuples with the
42# following format:
43# (basename, logging name, relative installation path, extraction filter)
44_RESOURCES = [
45  ('benchmark.zip', 'benchmark', '', None),
46  ('binaries.zip', 'binaries', 'exe', None),
47  ('symbols.zip', 'symbols', 'exe',
48      lambda x: x.filename.endswith('.dll.pdb'))]
49
50
51# Name of the MS DIA dll that we need to copy to the binaries directory.
52_DIA_DLL_NAME = "msdia140.dll"
53
54
55def _LoadState(output_dir):
56  """Loads the contents of the state file for a given |output_dir|, returning
57  None if it doesn't exist.
58  """
59  path = os.path.join(output_dir, _STATE)
60  if not os.path.exists(path):
61    _LOGGER.debug('No state file found.')
62    return None
63  with open(path, 'rb') as f:
64    _LOGGER.debug('Reading state file: %s', path)
65    try:
66      return json.load(f)
67    except ValueError:
68      _LOGGER.debug('Invalid state file.')
69      return None
70
71
72def _SaveState(output_dir, state, dry_run=False):
73  """Saves the |state| dictionary to the given |output_dir| as a JSON file."""
74  path = os.path.join(output_dir, _STATE)
75  _LOGGER.debug('Writing state file: %s', path)
76  if dry_run:
77    return
78  with open(path, 'wb') as f:
79    f.write(json.dumps(state, sort_keys=True, indent=2))
80
81
82def _Md5(path):
83  """Returns the MD5 hash of the file at |path|, which must exist."""
84  return hashlib.md5(open(path, 'rb').read()).hexdigest()
85
86
87def _StateIsValid(state):
88  """Returns true if the given state structure is valid."""
89  if not isinstance(state, dict):
90    _LOGGER.debug('State must be a dict.')
91    return False
92  r = state.get('revision', None)
93  if not isinstance(r, basestring) or not _REVISION_RE.match(r):
94    _LOGGER.debug('State contains an invalid revision.')
95    return False
96  c = state.get('contents', None)
97  if not isinstance(c, dict):
98    _LOGGER.debug('State must contain a contents dict.')
99    return False
100  for (relpath, md5) in c.iteritems():
101    if not isinstance(relpath, basestring) or len(relpath) == 0:
102      _LOGGER.debug('State contents dict contains an invalid path.')
103      return False
104    if not isinstance(md5, basestring) or not _MD5_RE.match(md5):
105      _LOGGER.debug('State contents dict contains an invalid MD5 digest.')
106      return False
107  return True
108
109
110def _BuildActualState(stored, revision, output_dir):
111  """Builds the actual state using the provided |stored| state as a template.
112  Only examines files listed in the stored state, causing the script to ignore
113  files that have been added to the directories locally. |stored| must be a
114  valid state dictionary.
115  """
116  contents = {}
117  state = { 'revision': revision, 'contents': contents }
118  for relpath, md5 in stored['contents'].iteritems():
119    abspath = os.path.abspath(os.path.join(output_dir, relpath))
120    if os.path.isfile(abspath):
121      m = _Md5(abspath)
122      contents[relpath] = m
123
124  return state
125
126
127def _StatesAreConsistent(stored, actual):
128  """Validates whether two state dictionaries are consistent. Both must be valid
129  state dictionaries. Additional entries in |actual| are ignored.
130  """
131  if stored['revision'] != actual['revision']:
132    _LOGGER.debug('Mismatched revision number.')
133    return False
134  cont_stored = stored['contents']
135  cont_actual = actual['contents']
136  for relpath, md5 in cont_stored.iteritems():
137    if relpath not in cont_actual:
138      _LOGGER.debug('Missing content: %s', relpath)
139      return False
140    if md5 != cont_actual[relpath]:
141      _LOGGER.debug('Modified content: %s', relpath)
142      return False
143  return True
144
145
146def _GetCurrentState(revision, output_dir):
147  """Loads the current state and checks to see if it is consistent. Returns
148  a tuple (state, bool). The returned state will always be valid, even if an
149  invalid state is present on disk.
150  """
151  stored = _LoadState(output_dir)
152  if not _StateIsValid(stored):
153    _LOGGER.debug('State is invalid.')
154    # Return a valid but empty state.
155    return ({'revision': '0', 'contents': {}}, False)
156  actual = _BuildActualState(stored, revision, output_dir)
157  # If the script has been modified consider the state invalid.
158  path = os.path.join(output_dir, _STATE)
159  if os.path.getmtime(__file__) > os.path.getmtime(path):
160    return (stored, False)
161  # Otherwise, explicitly validate the state.
162  if not _StatesAreConsistent(stored, actual):
163    return (stored, False)
164  return (stored, True)
165
166
167def _DirIsEmpty(path):
168  """Returns true if the given directory is empty, false otherwise."""
169  for root, dirs, files in os.walk(path):
170    return not dirs and not files
171
172
173def _RmTreeHandleReadOnly(func, path, exc):
174  """An error handling function for use with shutil.rmtree. This will
175  detect failures to remove read-only files, and will change their properties
176  prior to removing them. This is necessary on Windows as os.remove will return
177  an access error for read-only files, and git repos contain read-only
178  pack/index files.
179  """
180  excvalue = exc[1]
181  if func in (os.rmdir, os.remove) and excvalue.errno == errno.EACCES:
182    _LOGGER.debug('Removing read-only path: %s', path)
183    os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
184    func(path)
185  else:
186    raise
187
188
189def _RmTree(path):
190  """A wrapper of shutil.rmtree that handles read-only files."""
191  shutil.rmtree(path, ignore_errors=False, onerror=_RmTreeHandleReadOnly)
192
193
194def _CleanState(output_dir, state, dry_run=False):
195  """Cleans up files/directories in |output_dir| that are referenced by
196  the given |state|. Raises an error if there are local changes. Returns a
197  dictionary of files that were deleted.
198  """
199  _LOGGER.debug('Deleting files from previous installation.')
200  deleted = {}
201
202  # Generate a list of files to delete, relative to |output_dir|.
203  contents = state['contents']
204  files = sorted(contents.keys())
205
206  # Try to delete the files. Keep track of directories to delete as well.
207  dirs = {}
208  for relpath in files:
209    fullpath = os.path.join(output_dir, relpath)
210    fulldir = os.path.dirname(fullpath)
211    dirs[fulldir] = True
212    if os.path.exists(fullpath):
213      # If somehow the file has become a directory complain about it.
214      if os.path.isdir(fullpath):
215        raise Exception('Directory exists where file expected: %s' % fullpath)
216
217      # Double check that the file doesn't have local changes. If it does
218      # then refuse to delete it.
219      if relpath in contents:
220        stored_md5 = contents[relpath]
221        actual_md5 = _Md5(fullpath)
222        if actual_md5 != stored_md5:
223          raise Exception('File has local changes: %s' % fullpath)
224
225      # The file is unchanged so it can safely be deleted.
226      _LOGGER.debug('Deleting file "%s".', fullpath)
227      deleted[relpath] = True
228      if not dry_run:
229        os.unlink(fullpath)
230
231  # Sort directories from longest name to shortest. This lets us remove empty
232  # directories from the most nested paths first.
233  dirs = sorted(dirs.keys(), key=lambda x: len(x), reverse=True)
234  for p in dirs:
235    if os.path.exists(p) and _DirIsEmpty(p):
236      _LOGGER.debug('Deleting empty directory "%s".', p)
237      if not dry_run:
238        _RmTree(p)
239
240  return deleted
241
242
243def _FindGsUtil():
244  """Looks for depot_tools and returns the absolute path to gsutil.py."""
245  for path in os.environ['PATH'].split(os.pathsep):
246    path = os.path.abspath(path)
247    git_cl = os.path.join(path, 'git_cl.py')
248    gs_util = os.path.join(path, 'gsutil.py')
249    if os.path.exists(git_cl) and os.path.exists(gs_util):
250      return gs_util
251  return None
252
253
254def _GsUtil(*cmd):
255  """Runs the given command in gsutil with exponential backoff and retries."""
256  gs_util = _FindGsUtil()
257  cmd = [sys.executable, gs_util] + list(cmd)
258
259  retries = 3
260  timeout = 4  # Seconds.
261  while True:
262    _LOGGER.debug('Running %s', cmd)
263    prog = subprocess.Popen(cmd, shell=False)
264    prog.communicate()
265
266    # Stop retrying on success.
267    if prog.returncode == 0:
268      return
269
270    # Raise a permanent failure if retries have been exhausted.
271    if retries == 0:
272      raise RuntimeError('Command "%s" returned %d.' % (cmd, prog.returncode))
273
274    _LOGGER.debug('Sleeping %d seconds and trying again.', timeout)
275    time.sleep(timeout)
276    retries -= 1
277    timeout *= 2
278
279
280def _Download(resource):
281  """Downloads the given GS resource to a temporary file, returning its path."""
282  tmp = tempfile.mkstemp(suffix='syzygy_archive')
283  os.close(tmp[0])
284  tmp_file = tmp[1]
285  url = 'gs://syzygy-archive' + resource
286  if sys.platform == 'cygwin':
287    # Change temporary path to Windows path for gsutil
288    def winpath(path):
289      return subprocess.check_output(['cygpath', '-w', path]).strip()
290    tmp_file = winpath(tmp_file)
291  _GsUtil('cp', url, tmp_file)
292  return tmp[1]
293
294
295def _MaybeCopyDIABinaries(options, contents):
296  """Try to copy the DIA DLL to the binaries exe directory."""
297  toolchain_data_file = os.path.join(os.path.dirname(__file__),
298                                     'win_toolchain.json')
299  if not os.path.exists(toolchain_data_file):
300    _LOGGER.debug('Toolchain JSON data file doesn\'t exist, skipping.')
301    return
302  with open(toolchain_data_file) as temp_f:
303    toolchain_data = json.load(temp_f)
304  if not os.path.isdir(toolchain_data['path']):
305    _LOGGER.error('The toolchain JSON file is invalid.')
306    return
307  dia_sdk_binaries_dir = os.path.join(toolchain_data['path'], 'DIA SDK', 'bin')
308  dia_dll = os.path.join(dia_sdk_binaries_dir, _DIA_DLL_NAME)
309  if not os.path.exists(dia_dll):
310    _LOGGER.debug('%s is missing, skipping.')
311    return
312  dia_dll_dest = os.path.join(options.output_dir, 'exe', _DIA_DLL_NAME)
313  _LOGGER.debug('Copying %s to %s.' % (dia_dll, dia_dll_dest))
314  if not options.dry_run:
315    shutil.copy(dia_dll, dia_dll_dest)
316    contents[os.path.relpath(dia_dll_dest, options.output_dir)] = (
317        _Md5(dia_dll_dest))
318
319
320def _InstallBinaries(options, deleted={}):
321  """Installs Syzygy binaries. This assumes that the output directory has
322  already been cleaned, as it will refuse to overwrite existing files."""
323  contents = {}
324  state = { 'revision': options.revision, 'contents': contents }
325  archive_path = _SYZYGY_ARCHIVE_PATH % { 'revision': options.revision }
326  if options.resources:
327    resources = [(resource, resource, '', None)
328                 for resource in options.resources]
329  else:
330    resources = _RESOURCES
331  for (base, name, subdir, filt) in resources:
332    # Create the output directory if it doesn't exist.
333    fulldir = os.path.join(options.output_dir, subdir)
334    if os.path.isfile(fulldir):
335      raise Exception('File exists where a directory needs to be created: %s' %
336                      fulldir)
337    if not os.path.exists(fulldir):
338      _LOGGER.debug('Creating directory: %s', fulldir)
339      if not options.dry_run:
340        os.makedirs(fulldir)
341
342    # Download and read the archive.
343    resource = archive_path + '/' + base
344    _LOGGER.debug('Retrieving %s archive at "%s".', name, resource)
345    path = _Download(resource)
346
347    _LOGGER.debug('Unzipping %s archive.', name)
348    with open(path, 'rb') as data:
349      archive = zipfile.ZipFile(data)
350      for entry in archive.infolist():
351        if not filt or filt(entry):
352          fullpath = os.path.normpath(os.path.join(fulldir, entry.filename))
353          relpath = os.path.relpath(fullpath, options.output_dir)
354          if os.path.exists(fullpath):
355            # If in a dry-run take into account the fact that the file *would*
356            # have been deleted.
357            if options.dry_run and relpath in deleted:
358              pass
359            else:
360              raise Exception('Path already exists: %s' % fullpath)
361
362          # Extract the file and update the state dictionary.
363          _LOGGER.debug('Extracting "%s".', fullpath)
364          if not options.dry_run:
365            archive.extract(entry.filename, fulldir)
366            md5 = _Md5(fullpath)
367            contents[relpath] = md5
368            if sys.platform == 'cygwin':
369              os.chmod(fullpath, os.stat(fullpath).st_mode | stat.S_IXUSR)
370
371    _LOGGER.debug('Removing temporary file "%s".', path)
372    os.remove(path)
373
374  if options.copy_dia_binaries:
375    # Try to copy the DIA binaries to the binaries directory.
376    _MaybeCopyDIABinaries(options, contents)
377
378  return state
379
380
381def _ParseCommandLine():
382  """Parses the command-line and returns an options structure."""
383  option_parser = optparse.OptionParser()
384  option_parser.add_option('--dry-run', action='store_true', default=False,
385      help='If true then will simply list actions that would be performed.')
386  option_parser.add_option('--force', action='store_true', default=False,
387      help='Force an installation even if the binaries are up to date.')
388  option_parser.add_option('--no-cleanup', action='store_true', default=False,
389      help='Allow installation on non-Windows platforms, and skip the forced '
390           'cleanup step.')
391  option_parser.add_option('--output-dir', type='string',
392      help='The path where the binaries will be replaced. Existing binaries '
393           'will only be overwritten if not up to date.')
394  option_parser.add_option('--overwrite', action='store_true', default=False,
395      help='If specified then the installation will happily delete and rewrite '
396           'the entire output directory, blasting any local changes.')
397  option_parser.add_option('--revision', type='string',
398      help='The SVN revision or GIT hash associated with the required version.')
399  option_parser.add_option('--revision-file', type='string',
400      help='A text file containing an SVN revision or GIT hash.')
401  option_parser.add_option('--resource', type='string', action='append',
402      dest='resources', help='A resource to be downloaded.')
403  option_parser.add_option('--verbose', dest='log_level', action='store_const',
404      default=logging.INFO, const=logging.DEBUG,
405      help='Enables verbose logging.')
406  option_parser.add_option('--quiet', dest='log_level', action='store_const',
407      default=logging.INFO, const=logging.ERROR,
408      help='Disables all output except for errors.')
409  option_parser.add_option('--copy-dia-binaries', action='store_true',
410      default=False, help='If true then the DIA dll will get copied into the '
411                          'binaries directory if it\'s available.')
412  options, args = option_parser.parse_args()
413  if args:
414    option_parser.error('Unexpected arguments: %s' % args)
415  if not options.output_dir:
416    option_parser.error('Must specify --output-dir.')
417  if not options.revision and not options.revision_file:
418    option_parser.error('Must specify one of --revision or --revision-file.')
419  if options.revision and options.revision_file:
420    option_parser.error('Must not specify both --revision and --revision-file.')
421
422  # Configure logging.
423  logging.basicConfig(level=options.log_level)
424
425  # If a revision file has been specified then read it.
426  if options.revision_file:
427    options.revision = open(options.revision_file, 'rb').read().strip()
428    _LOGGER.debug('Parsed revision "%s" from file "%s".',
429                 options.revision, options.revision_file)
430
431  # Ensure that the specified SVN revision or GIT hash is valid.
432  if not _REVISION_RE.match(options.revision):
433    option_parser.error('Must specify a valid SVN or GIT revision.')
434
435  # This just makes output prettier to read.
436  options.output_dir = os.path.normpath(options.output_dir)
437
438  return options
439
440
441def _RemoveOrphanedFiles(options):
442  """This is run on non-Windows systems to remove orphaned files that may have
443  been downloaded by a previous version of this script.
444  """
445  # Reconfigure logging to output info messages. This will allow inspection of
446  # cleanup status on non-Windows buildbots.
447  _LOGGER.setLevel(logging.INFO)
448
449  output_dir = os.path.abspath(options.output_dir)
450
451  # We only want to clean up the folder in 'src/third_party/syzygy', and we
452  # expect to be called with that as an output directory. This is an attempt to
453  # not start deleting random things if the script is run from an alternate
454  # location, or not called from the gclient hooks.
455  expected_syzygy_dir = os.path.abspath(os.path.join(
456      os.path.dirname(__file__), '..', 'third_party', 'syzygy'))
457  expected_output_dir = os.path.join(expected_syzygy_dir, 'binaries')
458  if expected_output_dir != output_dir:
459    _LOGGER.info('Unexpected output directory, skipping cleanup.')
460    return
461
462  if not os.path.isdir(expected_syzygy_dir):
463    _LOGGER.info('Output directory does not exist, skipping cleanup.')
464    return
465
466  def OnError(function, path, excinfo):
467    """Logs error encountered by shutil.rmtree."""
468    _LOGGER.error('Error when running %s(%s)', function, path, exc_info=excinfo)
469
470  _LOGGER.info('Removing orphaned files from %s', expected_syzygy_dir)
471  if not options.dry_run:
472    shutil.rmtree(expected_syzygy_dir, True, OnError)
473
474
475def main():
476  options = _ParseCommandLine()
477
478  if options.dry_run:
479    _LOGGER.debug('Performing a dry-run.')
480
481  # We only care about Windows platforms, as the Syzygy binaries aren't used
482  # elsewhere. However, there was a short period of time where this script
483  # wasn't gated on OS types, and those OSes downloaded and installed binaries.
484  # This will cleanup orphaned files on those operating systems.
485  if sys.platform not in ('win32', 'cygwin'):
486    if options.no_cleanup:
487      _LOGGER.debug('Skipping usual cleanup for non-Windows platforms.')
488    else:
489      return _RemoveOrphanedFiles(options)
490
491  # Load the current installation state, and validate it against the
492  # requested installation.
493  state, is_consistent = _GetCurrentState(options.revision, options.output_dir)
494
495  # Decide whether or not an install is necessary.
496  if options.force:
497    _LOGGER.debug('Forcing reinstall of binaries.')
498  elif is_consistent:
499    # Avoid doing any work if the contents of the directory are consistent.
500    _LOGGER.debug('State unchanged, no reinstall necessary.')
501    return
502
503  # Under normal logging this is the only only message that will be reported.
504  _LOGGER.info('Installing revision %s Syzygy binaries.',
505               options.revision[0:12])
506
507  # Clean up the old state to begin with.
508  deleted = []
509  if options.overwrite:
510    if os.path.exists(options.output_dir):
511      # If overwrite was specified then take a heavy-handed approach.
512      _LOGGER.debug('Deleting entire installation directory.')
513      if not options.dry_run:
514        _RmTree(options.output_dir)
515  else:
516    # Otherwise only delete things that the previous installation put in place,
517    # and take care to preserve any local changes.
518    deleted = _CleanState(options.output_dir, state, options.dry_run)
519
520  # Install the new binaries. In a dry-run this will actually download the
521  # archives, but it won't write anything to disk.
522  state = _InstallBinaries(options, deleted)
523
524  # Build and save the state for the directory.
525  _SaveState(options.output_dir, state, options.dry_run)
526
527
528if __name__ == '__main__':
529  main()
530