1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Wrappers for gsutil, for basic interaction with Google Cloud Storage.""" 6 7import collections 8import contextlib 9import hashlib 10import logging 11import os 12import shutil 13import stat 14import subprocess 15import re 16import sys 17import tempfile 18import time 19 20import py_utils 21from py_utils import lock 22 23# Do a no-op import here so that cloud_storage_global_lock dep is picked up 24# by https://cs.chromium.org/chromium/src/build/android/test_runner.pydeps. 25# TODO(nedn, jbudorick): figure out a way to get rid of this ugly hack. 26from py_utils import cloud_storage_global_lock # pylint: disable=unused-import 27 28logger = logging.getLogger(__name__) # pylint: disable=invalid-name 29 30 31PUBLIC_BUCKET = 'chromium-telemetry' 32PARTNER_BUCKET = 'chrome-partner-telemetry' 33INTERNAL_BUCKET = 'chrome-telemetry' 34TELEMETRY_OUTPUT = 'chrome-telemetry-output' 35 36# Uses ordered dict to make sure that bucket's key-value items are ordered from 37# the most open to the most restrictive. 38BUCKET_ALIASES = collections.OrderedDict(( 39 ('public', PUBLIC_BUCKET), 40 ('partner', PARTNER_BUCKET), 41 ('internal', INTERNAL_BUCKET), 42 ('output', TELEMETRY_OUTPUT), 43)) 44 45BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys() 46 47 48_GSUTIL_PATH = os.path.join(py_utils.GetCatapultDir(), 'third_party', 'gsutil', 49 'gsutil') 50 51# TODO(tbarzic): A workaround for http://crbug.com/386416 and 52# http://crbug.com/359293. See |_RunCommand|. 53_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/' 54 55 56# If Environment variables has DISABLE_CLOUD_STORAGE_IO set to '1', any method 57# calls that invoke cloud storage network io will throw exceptions. 58DISABLE_CLOUD_STORAGE_IO = 'DISABLE_CLOUD_STORAGE_IO' 59 60# The maximum number of seconds to wait to acquire the pseudo lock for a cloud 61# storage file before raising an exception. 62LOCK_ACQUISITION_TIMEOUT = 10 63 64 65class CloudStorageError(Exception): 66 67 @staticmethod 68 def _GetConfigInstructions(): 69 command = _GSUTIL_PATH 70 if py_utils.IsRunningOnCrosDevice(): 71 command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH) 72 return ('To configure your credentials:\n' 73 ' 1. Run "%s config" and follow its instructions.\n' 74 ' 2. If you have a @google.com account, use that account.\n' 75 ' 3. For the project-id, just enter 0.' % command) 76 77 78class PermissionError(CloudStorageError): 79 80 def __init__(self): 81 super(PermissionError, self).__init__( 82 'Attempted to access a file from Cloud Storage but you don\'t ' 83 'have permission. ' + self._GetConfigInstructions()) 84 85 86class CredentialsError(CloudStorageError): 87 88 def __init__(self): 89 super(CredentialsError, self).__init__( 90 'Attempted to access a file from Cloud Storage but you have no ' 91 'configured credentials. ' + self._GetConfigInstructions()) 92 93 94class CloudStorageIODisabled(CloudStorageError): 95 pass 96 97 98class NotFoundError(CloudStorageError): 99 pass 100 101 102class ServerError(CloudStorageError): 103 pass 104 105 106# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()? 107def _FindExecutableInPath(relative_executable_path, *extra_search_paths): 108 search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep) 109 for search_path in search_paths: 110 executable_path = os.path.join(search_path, relative_executable_path) 111 if py_utils.IsExecutable(executable_path): 112 return executable_path 113 return None 114 115 116def _EnsureExecutable(gsutil): 117 """chmod +x if gsutil is not executable.""" 118 st = os.stat(gsutil) 119 if not st.st_mode & stat.S_IEXEC: 120 os.chmod(gsutil, st.st_mode | stat.S_IEXEC) 121 122 123def _IsRunningOnSwarming(): 124 return os.environ.get('SWARMING_HEADLESS') is not None 125 126def _RunCommand(args): 127 # On cros device, as telemetry is running as root, home will be set to /root/, 128 # which is not writable. gsutil will attempt to create a download tracker dir 129 # in home dir and fail. To avoid this, override HOME dir to something writable 130 # when running on cros device. 131 # 132 # TODO(tbarzic): Figure out a better way to handle gsutil on cros. 133 # http://crbug.com/386416, http://crbug.com/359293. 134 gsutil_env = None 135 if py_utils.IsRunningOnCrosDevice(): 136 gsutil_env = os.environ.copy() 137 gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR 138 elif _IsRunningOnSwarming(): 139 gsutil_env = os.environ.copy() 140 141 if os.name == 'nt': 142 # If Windows, prepend python. Python scripts aren't directly executable. 143 args = [sys.executable, _GSUTIL_PATH] + args 144 else: 145 # Don't do it on POSIX, in case someone is using a shell script to redirect. 146 args = [_GSUTIL_PATH] + args 147 _EnsureExecutable(_GSUTIL_PATH) 148 149 if args[0] not in ('help', 'hash', 'version') and not IsNetworkIOEnabled(): 150 raise CloudStorageIODisabled( 151 "Environment variable DISABLE_CLOUD_STORAGE_IO is set to 1. " 152 'Command %s is not allowed to run' % args) 153 154 gsutil = subprocess.Popen(args, stdout=subprocess.PIPE, 155 stderr=subprocess.PIPE, env=gsutil_env) 156 stdout, stderr = gsutil.communicate() 157 158 if gsutil.returncode: 159 raise GetErrorObjectForCloudStorageStderr(stderr) 160 161 return stdout 162 163 164def GetErrorObjectForCloudStorageStderr(stderr): 165 if (stderr.startswith(( 166 'You are attempting to access protected data with no configured', 167 'Failure: No handler was ready to authenticate.')) or 168 re.match('.*401.*does not have .* access to .*', stderr)): 169 return CredentialsError() 170 if ('status=403' in stderr or 'status 403' in stderr or 171 '403 Forbidden' in stderr or 172 re.match('.*403.*does not have .* access to .*', stderr)): 173 return PermissionError() 174 if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or 175 'No URLs matched' in stderr or 'One or more URLs matched no' in stderr): 176 return NotFoundError(stderr) 177 if '500 Internal Server Error' in stderr: 178 return ServerError(stderr) 179 return CloudStorageError(stderr) 180 181 182def IsNetworkIOEnabled(): 183 """Returns true if cloud storage is enabled.""" 184 disable_cloud_storage_env_val = os.getenv(DISABLE_CLOUD_STORAGE_IO) 185 186 if disable_cloud_storage_env_val and disable_cloud_storage_env_val != '1': 187 logger.error( 188 'Unsupported value of environment variable ' 189 'DISABLE_CLOUD_STORAGE_IO. Expected None or \'1\' but got %s.', 190 disable_cloud_storage_env_val) 191 192 return disable_cloud_storage_env_val != '1' 193 194 195def List(bucket): 196 query = 'gs://%s/' % bucket 197 stdout = _RunCommand(['ls', query]) 198 return [url[len(query):] for url in stdout.splitlines()] 199 200 201def Exists(bucket, remote_path): 202 try: 203 _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)]) 204 return True 205 except NotFoundError: 206 return False 207 208 209def Move(bucket1, bucket2, remote_path): 210 url1 = 'gs://%s/%s' % (bucket1, remote_path) 211 url2 = 'gs://%s/%s' % (bucket2, remote_path) 212 logger.info('Moving %s to %s', url1, url2) 213 _RunCommand(['mv', url1, url2]) 214 215 216def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to): 217 """Copy a file from one location in CloudStorage to another. 218 219 Args: 220 bucket_from: The cloud storage bucket where the file is currently located. 221 bucket_to: The cloud storage bucket it is being copied to. 222 remote_path_from: The file path where the file is located in bucket_from. 223 remote_path_to: The file path it is being copied to in bucket_to. 224 225 It should: cause no changes locally or to the starting file, and will 226 overwrite any existing files in the destination location. 227 """ 228 url1 = 'gs://%s/%s' % (bucket_from, remote_path_from) 229 url2 = 'gs://%s/%s' % (bucket_to, remote_path_to) 230 logger.info('Copying %s to %s', url1, url2) 231 _RunCommand(['cp', url1, url2]) 232 233 234def Delete(bucket, remote_path): 235 url = 'gs://%s/%s' % (bucket, remote_path) 236 logger.info('Deleting %s', url) 237 _RunCommand(['rm', url]) 238 239 240def Get(bucket, remote_path, local_path): 241 with _FileLock(local_path): 242 _GetLocked(bucket, remote_path, local_path) 243 244 245_CLOUD_STORAGE_GLOBAL_LOCK = os.path.join( 246 os.path.dirname(os.path.abspath(__file__)), 'cloud_storage_global_lock.py') 247 248 249@contextlib.contextmanager 250def _FileLock(base_path): 251 pseudo_lock_path = '%s.pseudo_lock' % base_path 252 _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path)) 253 254 # Make sure that we guard the creation, acquisition, release, and removal of 255 # the pseudo lock all with the same guard (_CLOUD_STORAGE_GLOBAL_LOCK). 256 # Otherwise, we can get nasty interleavings that result in multiple processes 257 # thinking they have an exclusive lock, like: 258 # 259 # (Process 1) Create and acquire the pseudo lock 260 # (Process 1) Release the pseudo lock 261 # (Process 1) Release the file lock 262 # (Process 2) Open and acquire the existing pseudo lock 263 # (Process 1) Delete the (existing) pseudo lock 264 # (Process 3) Create and acquire a new pseudo lock 265 # 266 # Using the same guard for creation and removal of the pseudo lock guarantees 267 # that all processes are referring to the same lock. 268 pseudo_lock_fd = None 269 pseudo_lock_fd_return = [] 270 py_utils.WaitFor(lambda: _AttemptPseudoLockAcquisition(pseudo_lock_path, 271 pseudo_lock_fd_return), 272 LOCK_ACQUISITION_TIMEOUT) 273 pseudo_lock_fd = pseudo_lock_fd_return[0] 274 275 try: 276 yield 277 finally: 278 py_utils.WaitFor(lambda: _AttemptPseudoLockRelease(pseudo_lock_fd), 279 LOCK_ACQUISITION_TIMEOUT) 280 281def _AttemptPseudoLockAcquisition(pseudo_lock_path, pseudo_lock_fd_return): 282 """Try to acquire the lock and return a boolean indicating whether the attempt 283 was successful. If the attempt was successful, pseudo_lock_fd_return, which 284 should be an empty array, will be modified to contain a single entry: the file 285 descriptor of the (now acquired) lock file. 286 287 This whole operation is guarded with the global cloud storage lock, which 288 prevents race conditions that might otherwise cause multiple processes to 289 believe they hold the same pseudo lock (see _FileLock for more details). 290 """ 291 pseudo_lock_fd = None 292 try: 293 with open(_CLOUD_STORAGE_GLOBAL_LOCK) as global_file: 294 with lock.FileLock(global_file, lock.LOCK_EX | lock.LOCK_NB): 295 # Attempt to acquire the lock in a non-blocking manner. If we block, 296 # then we'll cause deadlock because another process will be unable to 297 # acquire the cloud storage global lock in order to release the pseudo 298 # lock. 299 pseudo_lock_fd = open(pseudo_lock_path, 'w') 300 lock.AcquireFileLock(pseudo_lock_fd, lock.LOCK_EX | lock.LOCK_NB) 301 pseudo_lock_fd_return.append(pseudo_lock_fd) 302 return True 303 except (lock.LockException, IOError): 304 # We failed to acquire either the global cloud storage lock or the pseudo 305 # lock. 306 if pseudo_lock_fd: 307 pseudo_lock_fd.close() 308 return False 309 310 311def _AttemptPseudoLockRelease(pseudo_lock_fd): 312 """Try to release the pseudo lock and return a boolean indicating whether 313 the release was succesful. 314 315 This whole operation is guarded with the global cloud storage lock, which 316 prevents race conditions that might otherwise cause multiple processes to 317 believe they hold the same pseudo lock (see _FileLock for more details). 318 """ 319 pseudo_lock_path = pseudo_lock_fd.name 320 try: 321 with open(_CLOUD_STORAGE_GLOBAL_LOCK) as global_file: 322 with lock.FileLock(global_file, lock.LOCK_EX | lock.LOCK_NB): 323 lock.ReleaseFileLock(pseudo_lock_fd) 324 pseudo_lock_fd.close() 325 try: 326 os.remove(pseudo_lock_path) 327 except OSError: 328 # We don't care if the pseudo lock gets removed elsewhere before 329 # we have a chance to do so. 330 pass 331 return True 332 except (lock.LockException, IOError): 333 # We failed to acquire the global cloud storage lock and are thus unable to 334 # release the pseudo lock. 335 return False 336 337 338def _CreateDirectoryIfNecessary(directory): 339 if not os.path.exists(directory): 340 os.makedirs(directory) 341 342 343def _GetLocked(bucket, remote_path, local_path): 344 url = 'gs://%s/%s' % (bucket, remote_path) 345 logger.info('Downloading %s to %s', url, local_path) 346 _CreateDirectoryIfNecessary(os.path.dirname(local_path)) 347 with tempfile.NamedTemporaryFile( 348 dir=os.path.dirname(local_path), 349 delete=False) as partial_download_path: 350 try: 351 # Windows won't download to an open file. 352 partial_download_path.close() 353 try: 354 _RunCommand(['cp', url, partial_download_path.name]) 355 except ServerError: 356 logger.info('Cloud Storage server error, retrying download') 357 _RunCommand(['cp', url, partial_download_path.name]) 358 shutil.move(partial_download_path.name, local_path) 359 finally: 360 if os.path.exists(partial_download_path.name): 361 os.remove(partial_download_path.name) 362 363 364def Insert(bucket, remote_path, local_path, publicly_readable=False): 365 """ Upload file in |local_path| to cloud storage. 366 Args: 367 bucket: the google cloud storage bucket name. 368 remote_path: the remote file path in |bucket|. 369 local_path: path of the local file to be uploaded. 370 publicly_readable: whether the uploaded file has publicly readable 371 permission. 372 373 Returns: 374 The url where the file is uploaded to. 375 """ 376 url = 'gs://%s/%s' % (bucket, remote_path) 377 command_and_args = ['cp'] 378 extra_info = '' 379 if publicly_readable: 380 command_and_args += ['-a', 'public-read'] 381 extra_info = ' (publicly readable)' 382 command_and_args += [local_path, url] 383 logger.info('Uploading %s to %s%s', local_path, url, extra_info) 384 _RunCommand(command_and_args) 385 return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % ( 386 bucket, remote_path) 387 388 389def GetIfHashChanged(cs_path, download_path, bucket, file_hash): 390 """Downloads |download_path| to |file_path| if |file_path| doesn't exist or 391 it's hash doesn't match |file_hash|. 392 393 Returns: 394 True if the binary was changed. 395 Raises: 396 CredentialsError if the user has no configured credentials. 397 PermissionError if the user does not have permission to access the bucket. 398 NotFoundError if the file is not in the given bucket in cloud_storage. 399 """ 400 with _FileLock(download_path): 401 if (os.path.exists(download_path) and 402 CalculateHash(download_path) == file_hash): 403 return False 404 _GetLocked(bucket, cs_path, download_path) 405 return True 406 407 408def GetIfChanged(file_path, bucket): 409 """Gets the file at file_path if it has a hash file that doesn't match or 410 if there is no local copy of file_path, but there is a hash file for it. 411 412 Returns: 413 True if the binary was changed. 414 Raises: 415 CredentialsError if the user has no configured credentials. 416 PermissionError if the user does not have permission to access the bucket. 417 NotFoundError if the file is not in the given bucket in cloud_storage. 418 """ 419 with _FileLock(file_path): 420 hash_path = file_path + '.sha1' 421 fetch_ts_path = file_path + '.fetchts' 422 if not os.path.exists(hash_path): 423 logger.warning('Hash file not found: %s', hash_path) 424 return False 425 426 expected_hash = ReadHash(hash_path) 427 428 # To save the time required computing binary hash (which is an expensive 429 # operation, see crbug.com/793609#c2 for details), any time we fetch a new 430 # binary, we save not only that binary but the time of the fetch in 431 # |fetch_ts_path|. Anytime the file needs updated (its 432 # hash in |hash_path| change), we can just need to compare the timestamp of 433 # |hash_path| with the timestamp in |fetch_ts_path| to figure out 434 # if the update operation has been done. 435 # 436 # Notes: for this to work, we make the assumption that only 437 # cloud_storage.GetIfChanged modifies the local |file_path| binary. 438 439 if os.path.exists(fetch_ts_path) and os.path.exists(file_path): 440 with open(fetch_ts_path) as f: 441 data = f.read().strip() 442 last_binary_fetch_ts = float(data) 443 444 if last_binary_fetch_ts > os.path.getmtime(hash_path): 445 return False 446 447 # Whether the binary stored in local already has hash matched 448 # expected_hash or we need to fetch new binary from cloud, update the 449 # timestamp in |fetch_ts_path| with current time anyway since it is 450 # outdated compared with sha1's last modified time. 451 with open(fetch_ts_path, 'w') as f: 452 f.write(str(time.time())) 453 454 if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash: 455 return False 456 _GetLocked(bucket, expected_hash, file_path) 457 if CalculateHash(file_path) != expected_hash: 458 os.remove(fetch_ts_path) 459 raise RuntimeError( 460 'Binary stored in cloud storage does not have hash matching .sha1 ' 461 'file. Please make sure that the binary file is uploaded using ' 462 'depot_tools/upload_to_google_storage.py script or through automatic ' 463 'framework.') 464 return True 465 466 467def GetFilesInDirectoryIfChanged(directory, bucket): 468 """ Scan the directory for .sha1 files, and download them from the given 469 bucket in cloud storage if the local and remote hash don't match or 470 there is no local copy. 471 """ 472 if not os.path.isdir(directory): 473 raise ValueError( 474 '%s does not exist. Must provide a valid directory path.' % directory) 475 # Don't allow the root directory to be a serving_dir. 476 if directory == os.path.abspath(os.sep): 477 raise ValueError('Trying to serve root directory from HTTP server.') 478 for dirpath, _, filenames in os.walk(directory): 479 for filename in filenames: 480 path_name, extension = os.path.splitext( 481 os.path.join(dirpath, filename)) 482 if extension != '.sha1': 483 continue 484 GetIfChanged(path_name, bucket) 485 486 487def CalculateHash(file_path): 488 """Calculates and returns the hash of the file at file_path.""" 489 sha1 = hashlib.sha1() 490 with open(file_path, 'rb') as f: 491 while True: 492 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. 493 chunk = f.read(1024 * 1024) 494 if not chunk: 495 break 496 sha1.update(chunk) 497 return sha1.hexdigest() 498 499 500def ReadHash(hash_path): 501 with open(hash_path, 'rb') as f: 502 return f.read(1024).rstrip() 503