• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Wrappers for gsutil, for basic interaction with Google Cloud Storage."""
6
7import collections
8import contextlib
9import hashlib
10import logging
11import os
12import shutil
13import stat
14import subprocess
15import re
16import sys
17import tempfile
18import time
19
20import py_utils
21from py_utils import lock
22
23# Do a no-op import here so that cloud_storage_global_lock dep is picked up
24# by https://cs.chromium.org/chromium/src/build/android/test_runner.pydeps.
25# TODO(nedn, jbudorick): figure out a way to get rid of this ugly hack.
26from py_utils import cloud_storage_global_lock  # pylint: disable=unused-import
27
28logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
29
30
31PUBLIC_BUCKET = 'chromium-telemetry'
32PARTNER_BUCKET = 'chrome-partner-telemetry'
33INTERNAL_BUCKET = 'chrome-telemetry'
34TELEMETRY_OUTPUT = 'chrome-telemetry-output'
35
36# Uses ordered dict to make sure that bucket's key-value items are ordered from
37# the most open to the most restrictive.
38BUCKET_ALIASES = collections.OrderedDict((
39    ('public', PUBLIC_BUCKET),
40    ('partner', PARTNER_BUCKET),
41    ('internal', INTERNAL_BUCKET),
42    ('output', TELEMETRY_OUTPUT),
43))
44
45BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys()
46
47
48_GSUTIL_PATH = os.path.join(py_utils.GetCatapultDir(), 'third_party', 'gsutil',
49                            'gsutil')
50
51# TODO(tbarzic): A workaround for http://crbug.com/386416 and
52#     http://crbug.com/359293. See |_RunCommand|.
53_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/'
54
55
56# If Environment variables has DISABLE_CLOUD_STORAGE_IO set to '1', any method
57# calls that invoke cloud storage network io will throw exceptions.
58DISABLE_CLOUD_STORAGE_IO = 'DISABLE_CLOUD_STORAGE_IO'
59
60# The maximum number of seconds to wait to acquire the pseudo lock for a cloud
61# storage file before raising an exception.
62LOCK_ACQUISITION_TIMEOUT = 10
63
64
65class CloudStorageError(Exception):
66
67  @staticmethod
68  def _GetConfigInstructions():
69    command = _GSUTIL_PATH
70    if py_utils.IsRunningOnCrosDevice():
71      command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH)
72    return ('To configure your credentials:\n'
73            '  1. Run "%s config" and follow its instructions.\n'
74            '  2. If you have a @google.com account, use that account.\n'
75            '  3. For the project-id, just enter 0.' % command)
76
77
78class PermissionError(CloudStorageError):
79
80  def __init__(self):
81    super(PermissionError, self).__init__(
82        'Attempted to access a file from Cloud Storage but you don\'t '
83        'have permission. ' + self._GetConfigInstructions())
84
85
86class CredentialsError(CloudStorageError):
87
88  def __init__(self):
89    super(CredentialsError, self).__init__(
90        'Attempted to access a file from Cloud Storage but you have no '
91        'configured credentials. ' + self._GetConfigInstructions())
92
93
94class CloudStorageIODisabled(CloudStorageError):
95  pass
96
97
98class NotFoundError(CloudStorageError):
99  pass
100
101
102class ServerError(CloudStorageError):
103  pass
104
105
106# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()?
107def _FindExecutableInPath(relative_executable_path, *extra_search_paths):
108  search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep)
109  for search_path in search_paths:
110    executable_path = os.path.join(search_path, relative_executable_path)
111    if py_utils.IsExecutable(executable_path):
112      return executable_path
113  return None
114
115
116def _EnsureExecutable(gsutil):
117  """chmod +x if gsutil is not executable."""
118  st = os.stat(gsutil)
119  if not st.st_mode & stat.S_IEXEC:
120    os.chmod(gsutil, st.st_mode | stat.S_IEXEC)
121
122
123def _IsRunningOnSwarming():
124  return os.environ.get('SWARMING_HEADLESS') is not None
125
126def _RunCommand(args):
127  # On cros device, as telemetry is running as root, home will be set to /root/,
128  # which is not writable. gsutil will attempt to create a download tracker dir
129  # in home dir and fail. To avoid this, override HOME dir to something writable
130  # when running on cros device.
131  #
132  # TODO(tbarzic): Figure out a better way to handle gsutil on cros.
133  #     http://crbug.com/386416, http://crbug.com/359293.
134  gsutil_env = None
135  if py_utils.IsRunningOnCrosDevice():
136    gsutil_env = os.environ.copy()
137    gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR
138  elif _IsRunningOnSwarming():
139    gsutil_env = os.environ.copy()
140
141  if os.name == 'nt':
142    # If Windows, prepend python. Python scripts aren't directly executable.
143    args = [sys.executable, _GSUTIL_PATH] + args
144  else:
145    # Don't do it on POSIX, in case someone is using a shell script to redirect.
146    args = [_GSUTIL_PATH] + args
147    _EnsureExecutable(_GSUTIL_PATH)
148
149  if args[0] not in ('help', 'hash', 'version') and not IsNetworkIOEnabled():
150    raise CloudStorageIODisabled(
151        "Environment variable DISABLE_CLOUD_STORAGE_IO is set to 1. "
152        'Command %s is not allowed to run' % args)
153
154  gsutil = subprocess.Popen(args, stdout=subprocess.PIPE,
155                            stderr=subprocess.PIPE, env=gsutil_env)
156  stdout, stderr = gsutil.communicate()
157
158  if gsutil.returncode:
159    raise GetErrorObjectForCloudStorageStderr(stderr)
160
161  return stdout
162
163
164def GetErrorObjectForCloudStorageStderr(stderr):
165  if (stderr.startswith((
166      'You are attempting to access protected data with no configured',
167      'Failure: No handler was ready to authenticate.')) or
168      re.match('.*401.*does not have .* access to .*', stderr)):
169    return CredentialsError()
170  if ('status=403' in stderr or 'status 403' in stderr or
171      '403 Forbidden' in stderr or
172      re.match('.*403.*does not have .* access to .*', stderr)):
173    return PermissionError()
174  if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or
175      'No URLs matched' in stderr or 'One or more URLs matched no' in stderr):
176    return NotFoundError(stderr)
177  if '500 Internal Server Error' in stderr:
178    return ServerError(stderr)
179  return CloudStorageError(stderr)
180
181
182def IsNetworkIOEnabled():
183  """Returns true if cloud storage is enabled."""
184  disable_cloud_storage_env_val = os.getenv(DISABLE_CLOUD_STORAGE_IO)
185
186  if disable_cloud_storage_env_val and disable_cloud_storage_env_val != '1':
187    logger.error(
188        'Unsupported value of environment variable '
189        'DISABLE_CLOUD_STORAGE_IO. Expected None or \'1\' but got %s.',
190        disable_cloud_storage_env_val)
191
192  return disable_cloud_storage_env_val != '1'
193
194
195def List(bucket):
196  query = 'gs://%s/' % bucket
197  stdout = _RunCommand(['ls', query])
198  return [url[len(query):] for url in stdout.splitlines()]
199
200
201def Exists(bucket, remote_path):
202  try:
203    _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)])
204    return True
205  except NotFoundError:
206    return False
207
208
209def Move(bucket1, bucket2, remote_path):
210  url1 = 'gs://%s/%s' % (bucket1, remote_path)
211  url2 = 'gs://%s/%s' % (bucket2, remote_path)
212  logger.info('Moving %s to %s', url1, url2)
213  _RunCommand(['mv', url1, url2])
214
215
216def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to):
217  """Copy a file from one location in CloudStorage to another.
218
219  Args:
220      bucket_from: The cloud storage bucket where the file is currently located.
221      bucket_to: The cloud storage bucket it is being copied to.
222      remote_path_from: The file path where the file is located in bucket_from.
223      remote_path_to: The file path it is being copied to in bucket_to.
224
225  It should: cause no changes locally or to the starting file, and will
226  overwrite any existing files in the destination location.
227  """
228  url1 = 'gs://%s/%s' % (bucket_from, remote_path_from)
229  url2 = 'gs://%s/%s' % (bucket_to, remote_path_to)
230  logger.info('Copying %s to %s', url1, url2)
231  _RunCommand(['cp', url1, url2])
232
233
234def Delete(bucket, remote_path):
235  url = 'gs://%s/%s' % (bucket, remote_path)
236  logger.info('Deleting %s', url)
237  _RunCommand(['rm', url])
238
239
240def Get(bucket, remote_path, local_path):
241  with _FileLock(local_path):
242    _GetLocked(bucket, remote_path, local_path)
243
244
245_CLOUD_STORAGE_GLOBAL_LOCK = os.path.join(
246    os.path.dirname(os.path.abspath(__file__)), 'cloud_storage_global_lock.py')
247
248
249@contextlib.contextmanager
250def _FileLock(base_path):
251  pseudo_lock_path = '%s.pseudo_lock' % base_path
252  _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path))
253
254  # Make sure that we guard the creation, acquisition, release, and removal of
255  # the pseudo lock all with the same guard (_CLOUD_STORAGE_GLOBAL_LOCK).
256  # Otherwise, we can get nasty interleavings that result in multiple processes
257  # thinking they have an exclusive lock, like:
258  #
259  # (Process 1) Create and acquire the pseudo lock
260  # (Process 1) Release the pseudo lock
261  # (Process 1) Release the file lock
262  # (Process 2) Open and acquire the existing pseudo lock
263  # (Process 1) Delete the (existing) pseudo lock
264  # (Process 3) Create and acquire a new pseudo lock
265  #
266  # Using the same guard for creation and removal of the pseudo lock guarantees
267  # that all processes are referring to the same lock.
268  pseudo_lock_fd = None
269  pseudo_lock_fd_return = []
270  py_utils.WaitFor(lambda: _AttemptPseudoLockAcquisition(pseudo_lock_path,
271                                                         pseudo_lock_fd_return),
272                   LOCK_ACQUISITION_TIMEOUT)
273  pseudo_lock_fd = pseudo_lock_fd_return[0]
274
275  try:
276    yield
277  finally:
278    py_utils.WaitFor(lambda: _AttemptPseudoLockRelease(pseudo_lock_fd),
279                     LOCK_ACQUISITION_TIMEOUT)
280
281def _AttemptPseudoLockAcquisition(pseudo_lock_path, pseudo_lock_fd_return):
282  """Try to acquire the lock and return a boolean indicating whether the attempt
283  was successful. If the attempt was successful, pseudo_lock_fd_return, which
284  should be an empty array, will be modified to contain a single entry: the file
285  descriptor of the (now acquired) lock file.
286
287  This whole operation is guarded with the global cloud storage lock, which
288  prevents race conditions that might otherwise cause multiple processes to
289  believe they hold the same pseudo lock (see _FileLock for more details).
290  """
291  pseudo_lock_fd = None
292  try:
293    with open(_CLOUD_STORAGE_GLOBAL_LOCK) as global_file:
294      with lock.FileLock(global_file, lock.LOCK_EX | lock.LOCK_NB):
295        # Attempt to acquire the lock in a non-blocking manner. If we block,
296        # then we'll cause deadlock because another process will be unable to
297        # acquire the cloud storage global lock in order to release the pseudo
298        # lock.
299        pseudo_lock_fd = open(pseudo_lock_path, 'w')
300        lock.AcquireFileLock(pseudo_lock_fd, lock.LOCK_EX | lock.LOCK_NB)
301        pseudo_lock_fd_return.append(pseudo_lock_fd)
302        return True
303  except (lock.LockException, IOError):
304    # We failed to acquire either the global cloud storage lock or the pseudo
305    # lock.
306    if pseudo_lock_fd:
307      pseudo_lock_fd.close()
308    return False
309
310
311def _AttemptPseudoLockRelease(pseudo_lock_fd):
312  """Try to release the pseudo lock and return a boolean indicating whether
313  the release was succesful.
314
315  This whole operation is guarded with the global cloud storage lock, which
316  prevents race conditions that might otherwise cause multiple processes to
317  believe they hold the same pseudo lock (see _FileLock for more details).
318  """
319  pseudo_lock_path = pseudo_lock_fd.name
320  try:
321    with open(_CLOUD_STORAGE_GLOBAL_LOCK) as global_file:
322      with lock.FileLock(global_file, lock.LOCK_EX | lock.LOCK_NB):
323        lock.ReleaseFileLock(pseudo_lock_fd)
324        pseudo_lock_fd.close()
325        try:
326          os.remove(pseudo_lock_path)
327        except OSError:
328          # We don't care if the pseudo lock gets removed elsewhere before
329          # we have a chance to do so.
330          pass
331        return True
332  except (lock.LockException, IOError):
333    # We failed to acquire the global cloud storage lock and are thus unable to
334    # release the pseudo lock.
335    return False
336
337
338def _CreateDirectoryIfNecessary(directory):
339  if not os.path.exists(directory):
340    os.makedirs(directory)
341
342
343def _GetLocked(bucket, remote_path, local_path):
344  url = 'gs://%s/%s' % (bucket, remote_path)
345  logger.info('Downloading %s to %s', url, local_path)
346  _CreateDirectoryIfNecessary(os.path.dirname(local_path))
347  with tempfile.NamedTemporaryFile(
348      dir=os.path.dirname(local_path),
349      delete=False) as partial_download_path:
350    try:
351      # Windows won't download to an open file.
352      partial_download_path.close()
353      try:
354        _RunCommand(['cp', url, partial_download_path.name])
355      except ServerError:
356        logger.info('Cloud Storage server error, retrying download')
357        _RunCommand(['cp', url, partial_download_path.name])
358      shutil.move(partial_download_path.name, local_path)
359    finally:
360      if os.path.exists(partial_download_path.name):
361        os.remove(partial_download_path.name)
362
363
364def Insert(bucket, remote_path, local_path, publicly_readable=False):
365  """ Upload file in |local_path| to cloud storage.
366  Args:
367    bucket: the google cloud storage bucket name.
368    remote_path: the remote file path in |bucket|.
369    local_path: path of the local file to be uploaded.
370    publicly_readable: whether the uploaded file has publicly readable
371    permission.
372
373  Returns:
374    The url where the file is uploaded to.
375  """
376  url = 'gs://%s/%s' % (bucket, remote_path)
377  command_and_args = ['cp']
378  extra_info = ''
379  if publicly_readable:
380    command_and_args += ['-a', 'public-read']
381    extra_info = ' (publicly readable)'
382  command_and_args += [local_path, url]
383  logger.info('Uploading %s to %s%s', local_path, url, extra_info)
384  _RunCommand(command_and_args)
385  return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % (
386      bucket, remote_path)
387
388
389def GetIfHashChanged(cs_path, download_path, bucket, file_hash):
390  """Downloads |download_path| to |file_path| if |file_path| doesn't exist or
391     it's hash doesn't match |file_hash|.
392
393  Returns:
394    True if the binary was changed.
395  Raises:
396    CredentialsError if the user has no configured credentials.
397    PermissionError if the user does not have permission to access the bucket.
398    NotFoundError if the file is not in the given bucket in cloud_storage.
399  """
400  with _FileLock(download_path):
401    if (os.path.exists(download_path) and
402        CalculateHash(download_path) == file_hash):
403      return False
404    _GetLocked(bucket, cs_path, download_path)
405    return True
406
407
408def GetIfChanged(file_path, bucket):
409  """Gets the file at file_path if it has a hash file that doesn't match or
410  if there is no local copy of file_path, but there is a hash file for it.
411
412  Returns:
413    True if the binary was changed.
414  Raises:
415    CredentialsError if the user has no configured credentials.
416    PermissionError if the user does not have permission to access the bucket.
417    NotFoundError if the file is not in the given bucket in cloud_storage.
418  """
419  with _FileLock(file_path):
420    hash_path = file_path + '.sha1'
421    fetch_ts_path = file_path + '.fetchts'
422    if not os.path.exists(hash_path):
423      logger.warning('Hash file not found: %s', hash_path)
424      return False
425
426    expected_hash = ReadHash(hash_path)
427
428    # To save the time required computing binary hash (which is an expensive
429    # operation, see crbug.com/793609#c2 for details), any time we fetch a new
430    # binary, we save not only that binary but the time of the fetch in
431    # |fetch_ts_path|. Anytime the file needs updated (its
432    # hash in |hash_path| change), we can just need to compare the timestamp of
433    # |hash_path| with the timestamp in |fetch_ts_path| to figure out
434    # if the update operation has been done.
435    #
436    # Notes: for this to work, we make the assumption that only
437    # cloud_storage.GetIfChanged modifies the local |file_path| binary.
438
439    if os.path.exists(fetch_ts_path) and os.path.exists(file_path):
440      with open(fetch_ts_path) as f:
441        data = f.read().strip()
442        last_binary_fetch_ts = float(data)
443
444      if last_binary_fetch_ts > os.path.getmtime(hash_path):
445        return False
446
447    # Whether the binary stored in local already has hash matched
448    # expected_hash or we need to fetch new binary from cloud, update the
449    # timestamp in |fetch_ts_path| with current time anyway since it is
450    # outdated compared with sha1's last modified time.
451    with open(fetch_ts_path, 'w') as f:
452      f.write(str(time.time()))
453
454    if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash:
455      return False
456    _GetLocked(bucket, expected_hash, file_path)
457    if CalculateHash(file_path) != expected_hash:
458      os.remove(fetch_ts_path)
459      raise RuntimeError(
460          'Binary stored in cloud storage does not have hash matching .sha1 '
461          'file. Please make sure that the binary file is uploaded using '
462          'depot_tools/upload_to_google_storage.py script or through automatic '
463          'framework.')
464    return True
465
466
467def GetFilesInDirectoryIfChanged(directory, bucket):
468  """ Scan the directory for .sha1 files, and download them from the given
469  bucket in cloud storage if the local and remote hash don't match or
470  there is no local copy.
471  """
472  if not os.path.isdir(directory):
473    raise ValueError(
474        '%s does not exist. Must provide a valid directory path.' % directory)
475  # Don't allow the root directory to be a serving_dir.
476  if directory == os.path.abspath(os.sep):
477    raise ValueError('Trying to serve root directory from HTTP server.')
478  for dirpath, _, filenames in os.walk(directory):
479    for filename in filenames:
480      path_name, extension = os.path.splitext(
481          os.path.join(dirpath, filename))
482      if extension != '.sha1':
483        continue
484      GetIfChanged(path_name, bucket)
485
486
487def CalculateHash(file_path):
488  """Calculates and returns the hash of the file at file_path."""
489  sha1 = hashlib.sha1()
490  with open(file_path, 'rb') as f:
491    while True:
492      # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
493      chunk = f.read(1024 * 1024)
494      if not chunk:
495        break
496      sha1.update(chunk)
497  return sha1.hexdigest()
498
499
500def ReadHash(hash_path):
501  with open(hash_path, 'rb') as f:
502    return f.read(1024).rstrip()
503