1# -*- coding: utf-8 -*- 2# Copyright 2013 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""XML/boto gsutil Cloud API implementation for GCS and Amazon S3.""" 16 17from __future__ import absolute_import 18 19import base64 20import binascii 21import datetime 22import errno 23import httplib 24import json 25import multiprocessing 26import os 27import pickle 28import random 29import re 30import socket 31import tempfile 32import textwrap 33import threading 34import time 35import xml 36from xml.dom.minidom import parseString as XmlParseString 37from xml.sax import _exceptions as SaxExceptions 38 39import boto 40from boto import handler 41from boto.exception import ResumableDownloadException as BotoResumableDownloadException 42from boto.exception import ResumableTransferDisposition 43from boto.gs.cors import Cors 44from boto.gs.lifecycle import LifecycleConfig 45from boto.s3.cors import CORSConfiguration as S3Cors 46from boto.s3.deletemarker import DeleteMarker 47from boto.s3.lifecycle import Lifecycle as S3Lifecycle 48from boto.s3.prefix import Prefix 49 50from gslib.boto_resumable_upload import BotoResumableUpload 51from gslib.cloud_api import AccessDeniedException 52from gslib.cloud_api import ArgumentException 53from gslib.cloud_api import BadRequestException 54from gslib.cloud_api import CloudApi 55from gslib.cloud_api import NotEmptyException 56from gslib.cloud_api import NotFoundException 57from gslib.cloud_api import PreconditionException 58from gslib.cloud_api import ResumableDownloadException 59from gslib.cloud_api import ResumableUploadAbortException 60from gslib.cloud_api import ResumableUploadException 61from gslib.cloud_api import ResumableUploadStartOverException 62from gslib.cloud_api import ServiceException 63from gslib.cloud_api_helper import ValidateDstObjectMetadata 64# Imported for boto AuthHandler purposes. 65import gslib.devshell_auth_plugin # pylint: disable=unused-import 66from gslib.exception import CommandException 67from gslib.exception import InvalidUrlError 68from gslib.hashing_helper import Base64EncodeHash 69from gslib.hashing_helper import Base64ToHexHash 70from gslib.project_id import GOOG_PROJ_ID_HDR 71from gslib.project_id import PopulateProjectId 72from gslib.storage_url import StorageUrlFromString 73from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages 74from gslib.translation_helper import AclTranslation 75from gslib.translation_helper import AddS3MarkerAclToObjectMetadata 76from gslib.translation_helper import CorsTranslation 77from gslib.translation_helper import CreateBucketNotFoundException 78from gslib.translation_helper import CreateNotFoundExceptionForObjectWrite 79from gslib.translation_helper import CreateObjectNotFoundException 80from gslib.translation_helper import DEFAULT_CONTENT_TYPE 81from gslib.translation_helper import EncodeStringAsLong 82from gslib.translation_helper import GenerationFromUrlAndString 83from gslib.translation_helper import HeadersFromObjectMetadata 84from gslib.translation_helper import LifecycleTranslation 85from gslib.translation_helper import REMOVE_CORS_CONFIG 86from gslib.translation_helper import S3MarkerAclFromObjectMetadata 87from gslib.util import ConfigureNoOpAuthIfNeeded 88from gslib.util import DEFAULT_FILE_BUFFER_SIZE 89from gslib.util import GetMaxRetryDelay 90from gslib.util import GetNumRetries 91from gslib.util import S3_DELETE_MARKER_GUID 92from gslib.util import TWO_MIB 93from gslib.util import UnaryDictToXml 94from gslib.util import UTF8 95from gslib.util import XML_PROGRESS_CALLBACKS 96 97TRANSLATABLE_BOTO_EXCEPTIONS = (boto.exception.BotoServerError, 98 boto.exception.InvalidUriError, 99 boto.exception.ResumableDownloadException, 100 boto.exception.ResumableUploadException, 101 boto.exception.StorageCreateError, 102 boto.exception.StorageResponseError) 103 104# pylint: disable=global-at-module-level 105global boto_auth_initialized, boto_auth_initialized_lock 106# If multiprocessing is available, these will be overridden to process-safe 107# variables in InitializeMultiprocessingVariables. 108boto_auth_initialized_lock = threading.Lock() 109boto_auth_initialized = False 110 111NON_EXISTENT_OBJECT_REGEX = re.compile(r'.*non-\s*existent\s*object', 112 flags=re.DOTALL) 113# Determines whether an etag is a valid MD5. 114MD5_REGEX = re.compile(r'^"*[a-fA-F0-9]{32}"*$') 115 116 117def InitializeMultiprocessingVariables(): # pylint: disable=invalid-name 118 """Perform necessary initialization for multiprocessing. 119 120 See gslib.command.InitializeMultiprocessingVariables for an explanation 121 of why this is necessary. 122 """ 123 # pylint: disable=global-variable-undefined 124 global boto_auth_initialized, boto_auth_initialized_lock 125 boto_auth_initialized_lock = gslib.util.CreateLock() 126 boto_auth_initialized = multiprocessing.Value('i', 0) 127 128 129class DownloadProxyCallbackHandler(object): 130 """Intermediary callback to keep track of the number of bytes downloaded.""" 131 132 def __init__(self, start_byte, callback): 133 self._start_byte = start_byte 134 self._callback = callback 135 136 def call(self, bytes_downloaded, total_size): 137 """Saves necessary data and then calls the given Cloud API callback. 138 139 Args: 140 bytes_downloaded: Number of bytes processed so far. 141 total_size: Total size of the ongoing operation. 142 """ 143 if self._callback: 144 self._callback(self._start_byte + bytes_downloaded, total_size) 145 146 147class BotoTranslation(CloudApi): 148 """Boto-based XML translation implementation of gsutil Cloud API. 149 150 This class takes gsutil Cloud API objects, translates them to XML service 151 calls, and translates the results back into gsutil Cloud API objects for 152 use by the caller. 153 """ 154 155 def __init__(self, bucket_storage_uri_class, logger, provider=None, 156 credentials=None, debug=0, trace_token=None): 157 """Performs necessary setup for interacting with the cloud storage provider. 158 159 Args: 160 bucket_storage_uri_class: boto storage_uri class, used by APIs that 161 provide boto translation or mocking. 162 logger: logging.logger for outputting log messages. 163 provider: Provider prefix describing cloud storage provider to connect to. 164 'gs' and 's3' are supported. Function implementations ignore 165 the provider argument and use this one instead. 166 credentials: Unused. 167 debug: Debug level for the API implementation (0..3). 168 trace_token: Unused in this subclass. 169 """ 170 super(BotoTranslation, self).__init__(bucket_storage_uri_class, logger, 171 provider=provider, debug=debug) 172 _ = credentials 173 # pylint: disable=global-variable-undefined, global-variable-not-assigned 174 global boto_auth_initialized, boto_auth_initialized_lock 175 with boto_auth_initialized_lock: 176 ConfigureNoOpAuthIfNeeded() 177 if isinstance(boto_auth_initialized, bool): 178 boto_auth_initialized = True 179 else: 180 boto_auth_initialized.value = 1 181 self.api_version = boto.config.get_value( 182 'GSUtil', 'default_api_version', '1') 183 184 def GetBucket(self, bucket_name, provider=None, fields=None): 185 """See CloudApi class for function doc strings.""" 186 _ = provider 187 bucket_uri = self._StorageUriForBucket(bucket_name) 188 headers = {} 189 self._AddApiVersionToHeaders(headers) 190 try: 191 return self._BotoBucketToBucket(bucket_uri.get_bucket(validate=True, 192 headers=headers), 193 fields=fields) 194 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 195 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name) 196 197 def ListBuckets(self, project_id=None, provider=None, fields=None): 198 """See CloudApi class for function doc strings.""" 199 _ = provider 200 get_fields = self._ListToGetFields(list_fields=fields) 201 headers = {} 202 self._AddApiVersionToHeaders(headers) 203 if self.provider == 'gs': 204 headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id) 205 try: 206 provider_uri = boto.storage_uri( 207 '%s://' % self.provider, 208 suppress_consec_slashes=False, 209 bucket_storage_uri_class=self.bucket_storage_uri_class, 210 debug=self.debug) 211 212 buckets_iter = provider_uri.get_all_buckets(headers=headers) 213 for bucket in buckets_iter: 214 if self.provider == 's3' and bucket.name.lower() != bucket.name: 215 # S3 listings can return buckets with upper-case names, but boto 216 # can't successfully call them. 217 continue 218 yield self._BotoBucketToBucket(bucket, fields=get_fields) 219 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 220 self._TranslateExceptionAndRaise(e) 221 222 def PatchBucket(self, bucket_name, metadata, canned_acl=None, 223 canned_def_acl=None, preconditions=None, provider=None, 224 fields=None): 225 """See CloudApi class for function doc strings.""" 226 _ = provider 227 bucket_uri = self._StorageUriForBucket(bucket_name) 228 headers = {} 229 self._AddApiVersionToHeaders(headers) 230 try: 231 self._AddPreconditionsToHeaders(preconditions, headers) 232 if metadata.acl: 233 boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl) 234 bucket_uri.set_xml_acl(boto_acl.to_xml(), headers=headers) 235 if canned_acl: 236 canned_acls = bucket_uri.canned_acls() 237 if canned_acl not in canned_acls: 238 raise CommandException('Invalid canned ACL "%s".' % canned_acl) 239 bucket_uri.set_acl(canned_acl, bucket_uri.object_name) 240 if canned_def_acl: 241 canned_acls = bucket_uri.canned_acls() 242 if canned_def_acl not in canned_acls: 243 raise CommandException('Invalid canned ACL "%s".' % canned_def_acl) 244 bucket_uri.set_def_acl(canned_def_acl, bucket_uri.object_name) 245 if metadata.cors: 246 if metadata.cors == REMOVE_CORS_CONFIG: 247 metadata.cors = [] 248 boto_cors = CorsTranslation.BotoCorsFromMessage(metadata.cors) 249 bucket_uri.set_cors(boto_cors, False) 250 if metadata.defaultObjectAcl: 251 boto_acl = AclTranslation.BotoAclFromMessage( 252 metadata.defaultObjectAcl) 253 bucket_uri.set_def_xml_acl(boto_acl.to_xml(), headers=headers) 254 if metadata.lifecycle: 255 boto_lifecycle = LifecycleTranslation.BotoLifecycleFromMessage( 256 metadata.lifecycle) 257 bucket_uri.configure_lifecycle(boto_lifecycle, False) 258 if metadata.logging: 259 if self.provider == 'gs': 260 headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(None) 261 if metadata.logging.logBucket and metadata.logging.logObjectPrefix: 262 bucket_uri.enable_logging(metadata.logging.logBucket, 263 metadata.logging.logObjectPrefix, 264 False, headers) 265 else: # Logging field is present and empty. Disable logging. 266 bucket_uri.disable_logging(False, headers) 267 if metadata.versioning: 268 bucket_uri.configure_versioning(metadata.versioning.enabled, 269 headers=headers) 270 if metadata.website: 271 main_page_suffix = metadata.website.mainPageSuffix 272 error_page = metadata.website.notFoundPage 273 bucket_uri.set_website_config(main_page_suffix, error_page) 274 return self.GetBucket(bucket_name, fields=fields) 275 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 276 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name) 277 278 def CreateBucket(self, bucket_name, project_id=None, metadata=None, 279 provider=None, fields=None): 280 """See CloudApi class for function doc strings.""" 281 _ = provider 282 bucket_uri = self._StorageUriForBucket(bucket_name) 283 location = '' 284 if metadata and metadata.location: 285 location = metadata.location 286 # Pass storage_class param only if this is a GCS bucket. (In S3 the 287 # storage class is specified on the key object.) 288 headers = {} 289 if bucket_uri.scheme == 'gs': 290 self._AddApiVersionToHeaders(headers) 291 headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id) 292 storage_class = '' 293 if metadata and metadata.storageClass: 294 storage_class = metadata.storageClass 295 try: 296 bucket_uri.create_bucket(headers=headers, location=location, 297 storage_class=storage_class) 298 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 299 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name) 300 else: 301 try: 302 bucket_uri.create_bucket(headers=headers, location=location) 303 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 304 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name) 305 return self.GetBucket(bucket_name, fields=fields) 306 307 def DeleteBucket(self, bucket_name, preconditions=None, provider=None): 308 """See CloudApi class for function doc strings.""" 309 _ = provider, preconditions 310 bucket_uri = self._StorageUriForBucket(bucket_name) 311 headers = {} 312 self._AddApiVersionToHeaders(headers) 313 try: 314 bucket_uri.delete_bucket(headers=headers) 315 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 316 translated_exception = self._TranslateBotoException( 317 e, bucket_name=bucket_name) 318 if (translated_exception and 319 'BucketNotEmpty' in translated_exception.reason): 320 try: 321 if bucket_uri.get_versioning_config(): 322 if self.provider == 's3': 323 raise NotEmptyException( 324 'VersionedBucketNotEmpty (%s). Currently, gsutil does not ' 325 'support listing or removing S3 DeleteMarkers, so you may ' 326 'need to delete these using another tool to successfully ' 327 'delete this bucket.' % bucket_name, status=e.status) 328 raise NotEmptyException( 329 'VersionedBucketNotEmpty (%s)' % bucket_name, status=e.status) 330 else: 331 raise NotEmptyException('BucketNotEmpty (%s)' % bucket_name, 332 status=e.status) 333 except TRANSLATABLE_BOTO_EXCEPTIONS, e2: 334 self._TranslateExceptionAndRaise(e2, bucket_name=bucket_name) 335 elif translated_exception and translated_exception.status == 404: 336 raise NotFoundException('Bucket %s does not exist.' % bucket_name) 337 else: 338 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name) 339 340 def ListObjects(self, bucket_name, prefix=None, delimiter=None, 341 all_versions=None, provider=None, fields=None): 342 """See CloudApi class for function doc strings.""" 343 _ = provider 344 get_fields = self._ListToGetFields(list_fields=fields) 345 bucket_uri = self._StorageUriForBucket(bucket_name) 346 headers = {} 347 yield_prefixes = fields is None or 'prefixes' in fields 348 yield_objects = fields is None or any( 349 field.startswith('items/') for field in fields) 350 self._AddApiVersionToHeaders(headers) 351 try: 352 objects_iter = bucket_uri.list_bucket(prefix=prefix or '', 353 delimiter=delimiter or '', 354 all_versions=all_versions, 355 headers=headers) 356 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 357 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name) 358 359 try: 360 for key in objects_iter: 361 if yield_prefixes and isinstance(key, Prefix): 362 yield CloudApi.CsObjectOrPrefix(key.name, 363 CloudApi.CsObjectOrPrefixType.PREFIX) 364 elif yield_objects: 365 key_to_convert = key 366 367 # Listed keys are populated with these fields during bucket listing. 368 key_http_fields = set(['bucket', 'etag', 'name', 'updated', 369 'generation', 'metageneration', 'size']) 370 371 # When fields == None, the caller is requesting all possible fields. 372 # If the caller requested any fields that are not populated by bucket 373 # listing, we'll need to make a separate HTTP call for each object to 374 # get its metadata and populate the remaining fields with the result. 375 if not get_fields or (get_fields and not 376 get_fields.issubset(key_http_fields)): 377 378 generation = None 379 if getattr(key, 'generation', None): 380 generation = key.generation 381 if getattr(key, 'version_id', None): 382 generation = key.version_id 383 key_to_convert = self._GetBotoKey(bucket_name, key.name, 384 generation=generation) 385 return_object = self._BotoKeyToObject(key_to_convert, 386 fields=get_fields) 387 388 yield CloudApi.CsObjectOrPrefix(return_object, 389 CloudApi.CsObjectOrPrefixType.OBJECT) 390 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 391 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name) 392 393 def GetObjectMetadata(self, bucket_name, object_name, generation=None, 394 provider=None, fields=None): 395 """See CloudApi class for function doc strings.""" 396 _ = provider 397 try: 398 return self._BotoKeyToObject(self._GetBotoKey(bucket_name, object_name, 399 generation=generation), 400 fields=fields) 401 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 402 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name, 403 object_name=object_name, 404 generation=generation) 405 406 def _CurryDigester(self, digester_object): 407 """Curries a digester object into a form consumable by boto. 408 409 Key instantiates its own digesters by calling hash_algs[alg]() [note there 410 are no arguments to this function]. So in order to pass in our caught-up 411 digesters during a resumable download, we need to pass the digester 412 object but don't get to look it up based on the algorithm name. Here we 413 use a lambda to make lookup implicit. 414 415 Args: 416 digester_object: Input object to be returned by the created function. 417 418 Returns: 419 A function which when called will return the input object. 420 """ 421 return lambda: digester_object 422 423 def GetObjectMedia( 424 self, bucket_name, object_name, download_stream, provider=None, 425 generation=None, object_size=None, 426 download_strategy=CloudApi.DownloadStrategy.ONE_SHOT, 427 start_byte=0, end_byte=None, progress_callback=None, 428 serialization_data=None, digesters=None): 429 """See CloudApi class for function doc strings.""" 430 # This implementation will get the object metadata first if we don't pass it 431 # in via serialization_data. 432 headers = {} 433 self._AddApiVersionToHeaders(headers) 434 if 'accept-encoding' not in headers: 435 headers['accept-encoding'] = 'gzip' 436 if end_byte is not None: 437 headers['range'] = 'bytes=%s-%s' % (start_byte, end_byte) 438 elif start_byte > 0: 439 headers['range'] = 'bytes=%s-' % (start_byte) 440 elif start_byte < 0: 441 headers['range'] = 'bytes=%s' % (start_byte) 442 443 # Since in most cases we already made a call to get the object metadata, 444 # here we avoid an extra HTTP call by unpickling the key. This is coupled 445 # with the implementation in _BotoKeyToObject. 446 if serialization_data: 447 serialization_dict = json.loads(serialization_data) 448 key = pickle.loads(binascii.a2b_base64(serialization_dict['url'])) 449 else: 450 key = self._GetBotoKey(bucket_name, object_name, generation=generation) 451 452 if digesters and self.provider == 'gs': 453 hash_algs = {} 454 for alg in digesters: 455 hash_algs[alg] = self._CurryDigester(digesters[alg]) 456 else: 457 hash_algs = {} 458 459 total_size = object_size or 0 460 if serialization_data: 461 total_size = json.loads(serialization_data)['total_size'] 462 463 if total_size: 464 num_progress_callbacks = max(int(total_size) / TWO_MIB, 465 XML_PROGRESS_CALLBACKS) 466 else: 467 num_progress_callbacks = XML_PROGRESS_CALLBACKS 468 469 try: 470 if download_strategy is CloudApi.DownloadStrategy.RESUMABLE: 471 self._PerformResumableDownload( 472 download_stream, start_byte, end_byte, key, 473 headers=headers, callback=progress_callback, 474 num_callbacks=num_progress_callbacks, hash_algs=hash_algs) 475 elif download_strategy is CloudApi.DownloadStrategy.ONE_SHOT: 476 self._PerformSimpleDownload( 477 download_stream, key, progress_callback=progress_callback, 478 num_progress_callbacks=num_progress_callbacks, headers=headers, 479 hash_algs=hash_algs) 480 else: 481 raise ArgumentException('Unsupported DownloadStrategy: %s' % 482 download_strategy) 483 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 484 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name, 485 object_name=object_name, 486 generation=generation) 487 488 if self.provider == 's3': 489 if digesters: 490 491 class HashToDigester(object): 492 """Wrapper class to expose hash digests. 493 494 boto creates its own digesters in s3's get_file, returning on-the-fly 495 hashes only by way of key.local_hashes. To propagate the digest back 496 to the caller, this stub class implements the digest() function. 497 """ 498 499 def __init__(self, hash_val): 500 self.hash_val = hash_val 501 502 def digest(self): # pylint: disable=invalid-name 503 return self.hash_val 504 505 for alg_name in digesters: 506 if ((download_strategy == CloudApi.DownloadStrategy.RESUMABLE and 507 start_byte != 0) or 508 not ((getattr(key, 'local_hashes', None) and 509 alg_name in key.local_hashes))): 510 # For resumable downloads, boto does not provide a mechanism to 511 # catch up the hash in the case of a partially complete download. 512 # In this case or in the case where no digest was successfully 513 # calculated, set the digester to None, which indicates that we'll 514 # need to manually calculate the hash from the local file once it 515 # is complete. 516 digesters[alg_name] = None 517 else: 518 # Use the on-the-fly hash. 519 digesters[alg_name] = HashToDigester(key.local_hashes[alg_name]) 520 521 def _PerformSimpleDownload(self, download_stream, key, progress_callback=None, 522 num_progress_callbacks=XML_PROGRESS_CALLBACKS, 523 headers=None, hash_algs=None): 524 if not headers: 525 headers = {} 526 self._AddApiVersionToHeaders(headers) 527 try: 528 key.get_contents_to_file(download_stream, cb=progress_callback, 529 num_cb=num_progress_callbacks, headers=headers, 530 hash_algs=hash_algs) 531 except TypeError: # s3 and mocks do not support hash_algs 532 key.get_contents_to_file(download_stream, cb=progress_callback, 533 num_cb=num_progress_callbacks, headers=headers) 534 535 def _PerformResumableDownload(self, fp, start_byte, end_byte, key, 536 headers=None, callback=None, 537 num_callbacks=XML_PROGRESS_CALLBACKS, 538 hash_algs=None): 539 """Downloads bytes from key to fp, resuming as needed. 540 541 Args: 542 fp: File pointer into which data should be downloaded. 543 start_byte: Start byte of the download. 544 end_byte: End byte of the download. 545 key: Key object from which data is to be downloaded 546 headers: Headers to send when retrieving the file 547 callback: (optional) a callback function that will be called to report 548 progress on the download. The callback should accept two integer 549 parameters. The first integer represents the number of 550 bytes that have been successfully transmitted from the service. The 551 second represents the total number of bytes that need to be 552 transmitted. 553 num_callbacks: (optional) If a callback is specified with the callback 554 parameter, this determines the granularity of the callback 555 by defining the maximum number of times the callback will be 556 called during the file transfer. 557 hash_algs: Dict of hash algorithms to apply to downloaded bytes. 558 559 Raises: 560 ResumableDownloadException on error. 561 """ 562 if not headers: 563 headers = {} 564 self._AddApiVersionToHeaders(headers) 565 566 retryable_exceptions = (httplib.HTTPException, IOError, socket.error, 567 socket.gaierror) 568 569 debug = key.bucket.connection.debug 570 571 num_retries = GetNumRetries() 572 progress_less_iterations = 0 573 last_progress_byte = start_byte 574 575 while True: # Retry as long as we're making progress. 576 try: 577 cb_handler = DownloadProxyCallbackHandler(start_byte, callback) 578 headers = headers.copy() 579 headers['Range'] = 'bytes=%d-%d' % (start_byte, end_byte) 580 581 # Disable AWSAuthConnection-level retry behavior, since that would 582 # cause downloads to restart from scratch. 583 try: 584 key.get_file(fp, headers, cb_handler.call, num_callbacks, 585 override_num_retries=0, hash_algs=hash_algs) 586 except TypeError: 587 key.get_file(fp, headers, cb_handler.call, num_callbacks, 588 override_num_retries=0) 589 fp.flush() 590 # Download succeeded. 591 return 592 except retryable_exceptions, e: 593 if debug >= 1: 594 self.logger.info('Caught exception (%s)', repr(e)) 595 if isinstance(e, IOError) and e.errno == errno.EPIPE: 596 # Broken pipe error causes httplib to immediately 597 # close the socket (http://bugs.python.org/issue5542), 598 # so we need to close and reopen the key before resuming 599 # the download. 600 if self.provider == 's3': 601 key.get_file(fp, headers, cb_handler.call, num_callbacks, 602 override_num_retries=0) 603 else: # self.provider == 'gs' 604 key.get_file(fp, headers, cb_handler.call, num_callbacks, 605 override_num_retries=0, hash_algs=hash_algs) 606 except BotoResumableDownloadException, e: 607 if (e.disposition == 608 ResumableTransferDisposition.ABORT_CUR_PROCESS): 609 raise ResumableDownloadException(e.message) 610 else: 611 if debug >= 1: 612 self.logger.info('Caught ResumableDownloadException (%s) - will ' 613 'retry', e.message) 614 615 # At this point we had a re-tryable failure; see if made progress. 616 start_byte = fp.tell() 617 if start_byte > last_progress_byte: 618 last_progress_byte = start_byte 619 progress_less_iterations = 0 620 else: 621 progress_less_iterations += 1 622 623 if progress_less_iterations > num_retries: 624 # Don't retry any longer in the current process. 625 raise ResumableDownloadException( 626 'Too many resumable download attempts failed without ' 627 'progress. You might try this download again later') 628 629 # Close the key, in case a previous download died partway 630 # through and left data in the underlying key HTTP buffer. 631 # Do this within a try/except block in case the connection is 632 # closed (since key.close() attempts to do a final read, in which 633 # case this read attempt would get an IncompleteRead exception, 634 # which we can safely ignore). 635 try: 636 key.close() 637 except httplib.IncompleteRead: 638 pass 639 640 sleep_time_secs = min(random.random() * (2 ** progress_less_iterations), 641 GetMaxRetryDelay()) 642 if debug >= 1: 643 self.logger.info( 644 'Got retryable failure (%d progress-less in a row).\nSleeping %d ' 645 'seconds before re-trying', progress_less_iterations, 646 sleep_time_secs) 647 time.sleep(sleep_time_secs) 648 649 def PatchObjectMetadata(self, bucket_name, object_name, metadata, 650 canned_acl=None, generation=None, preconditions=None, 651 provider=None, fields=None): 652 """See CloudApi class for function doc strings.""" 653 _ = provider 654 object_uri = self._StorageUriForObject(bucket_name, object_name, 655 generation=generation) 656 657 headers = {} 658 self._AddApiVersionToHeaders(headers) 659 meta_headers = HeadersFromObjectMetadata(metadata, self.provider) 660 661 metadata_plus = {} 662 metadata_minus = set() 663 metadata_changed = False 664 for k, v in meta_headers.iteritems(): 665 metadata_changed = True 666 if v is None: 667 metadata_minus.add(k) 668 else: 669 metadata_plus[k] = v 670 671 self._AddPreconditionsToHeaders(preconditions, headers) 672 673 if metadata_changed: 674 try: 675 object_uri.set_metadata(metadata_plus, metadata_minus, False, 676 headers=headers) 677 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 678 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name, 679 object_name=object_name, 680 generation=generation) 681 682 if metadata.acl: 683 boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl) 684 try: 685 object_uri.set_xml_acl(boto_acl.to_xml(), key_name=object_name) 686 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 687 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name, 688 object_name=object_name, 689 generation=generation) 690 if canned_acl: 691 canned_acls = object_uri.canned_acls() 692 if canned_acl not in canned_acls: 693 raise CommandException('Invalid canned ACL "%s".' % canned_acl) 694 object_uri.set_acl(canned_acl, object_uri.object_name) 695 696 return self.GetObjectMetadata(bucket_name, object_name, 697 generation=generation, fields=fields) 698 699 def _PerformSimpleUpload(self, dst_uri, upload_stream, md5=None, 700 canned_acl=None, progress_callback=None, 701 headers=None): 702 dst_uri.set_contents_from_file(upload_stream, md5=md5, policy=canned_acl, 703 cb=progress_callback, headers=headers) 704 705 def _PerformStreamingUpload(self, dst_uri, upload_stream, canned_acl=None, 706 progress_callback=None, headers=None): 707 if dst_uri.get_provider().supports_chunked_transfer(): 708 dst_uri.set_contents_from_stream(upload_stream, policy=canned_acl, 709 cb=progress_callback, headers=headers) 710 else: 711 # Provider doesn't support chunked transfer, so copy to a temporary 712 # file. 713 (temp_fh, temp_path) = tempfile.mkstemp() 714 try: 715 with open(temp_path, 'wb') as out_fp: 716 stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE) 717 while stream_bytes: 718 out_fp.write(stream_bytes) 719 stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE) 720 with open(temp_path, 'rb') as in_fp: 721 dst_uri.set_contents_from_file(in_fp, policy=canned_acl, 722 headers=headers) 723 finally: 724 os.close(temp_fh) 725 os.unlink(temp_path) 726 727 def _PerformResumableUpload(self, key, upload_stream, upload_size, 728 tracker_callback, canned_acl=None, 729 serialization_data=None, progress_callback=None, 730 headers=None): 731 resumable_upload = BotoResumableUpload( 732 tracker_callback, self.logger, resume_url=serialization_data) 733 resumable_upload.SendFile(key, upload_stream, upload_size, 734 canned_acl=canned_acl, cb=progress_callback, 735 headers=headers) 736 737 def _UploadSetup(self, object_metadata, preconditions=None): 738 """Shared upload implementation. 739 740 Args: 741 object_metadata: Object metadata describing destination object. 742 preconditions: Optional gsutil Cloud API preconditions. 743 744 Returns: 745 Headers dictionary, StorageUri for upload (based on inputs) 746 """ 747 ValidateDstObjectMetadata(object_metadata) 748 749 headers = HeadersFromObjectMetadata(object_metadata, self.provider) 750 self._AddApiVersionToHeaders(headers) 751 752 if object_metadata.crc32c: 753 if 'x-goog-hash' in headers: 754 headers['x-goog-hash'] += ( 755 ',crc32c=%s' % object_metadata.crc32c.rstrip('\n')) 756 else: 757 headers['x-goog-hash'] = ( 758 'crc32c=%s' % object_metadata.crc32c.rstrip('\n')) 759 if object_metadata.md5Hash: 760 if 'x-goog-hash' in headers: 761 headers['x-goog-hash'] += ( 762 ',md5=%s' % object_metadata.md5Hash.rstrip('\n')) 763 else: 764 headers['x-goog-hash'] = ( 765 'md5=%s' % object_metadata.md5Hash.rstrip('\n')) 766 767 if 'content-type' in headers and not headers['content-type']: 768 headers['content-type'] = 'application/octet-stream' 769 770 self._AddPreconditionsToHeaders(preconditions, headers) 771 772 dst_uri = self._StorageUriForObject(object_metadata.bucket, 773 object_metadata.name) 774 return headers, dst_uri 775 776 def _HandleSuccessfulUpload(self, dst_uri, object_metadata, fields=None): 777 """Set ACLs on an uploaded object and return its metadata. 778 779 Args: 780 dst_uri: Generation-specific StorageUri describing the object. 781 object_metadata: Metadata for the object, including an ACL if applicable. 782 fields: If present, return only these Object metadata fields. 783 784 Returns: 785 gsutil Cloud API Object metadata. 786 787 Raises: 788 CommandException if the object was overwritten / deleted concurrently. 789 """ 790 try: 791 # The XML API does not support if-generation-match for GET requests. 792 # Therefore, if the object gets overwritten before the ACL and get_key 793 # operations, the best we can do is warn that it happened. 794 self._SetObjectAcl(object_metadata, dst_uri) 795 return self._BotoKeyToObject(dst_uri.get_key(), fields=fields) 796 except boto.exception.InvalidUriError as e: 797 if e.message and NON_EXISTENT_OBJECT_REGEX.match(e.message.encode(UTF8)): 798 raise CommandException('\n'.join(textwrap.wrap( 799 'Uploaded object (%s) was deleted or overwritten immediately ' 800 'after it was uploaded. This can happen if you attempt to upload ' 801 'to the same object multiple times concurrently.' % dst_uri.uri))) 802 else: 803 raise 804 805 def _SetObjectAcl(self, object_metadata, dst_uri): 806 """Sets the ACL (if present in object_metadata) on an uploaded object.""" 807 if object_metadata.acl: 808 boto_acl = AclTranslation.BotoAclFromMessage(object_metadata.acl) 809 dst_uri.set_xml_acl(boto_acl.to_xml()) 810 elif self.provider == 's3': 811 s3_acl = S3MarkerAclFromObjectMetadata(object_metadata) 812 if s3_acl: 813 dst_uri.set_xml_acl(s3_acl) 814 815 def UploadObjectResumable( 816 self, upload_stream, object_metadata, canned_acl=None, preconditions=None, 817 provider=None, fields=None, size=None, serialization_data=None, 818 tracker_callback=None, progress_callback=None): 819 """See CloudApi class for function doc strings.""" 820 if self.provider == 's3': 821 # Resumable uploads are not supported for s3. 822 return self.UploadObject( 823 upload_stream, object_metadata, canned_acl=canned_acl, 824 preconditions=preconditions, fields=fields, size=size) 825 headers, dst_uri = self._UploadSetup(object_metadata, 826 preconditions=preconditions) 827 if not tracker_callback: 828 raise ArgumentException('No tracker callback function set for ' 829 'resumable upload of %s' % dst_uri) 830 try: 831 self._PerformResumableUpload(dst_uri.new_key(headers=headers), 832 upload_stream, size, tracker_callback, 833 canned_acl=canned_acl, 834 serialization_data=serialization_data, 835 progress_callback=progress_callback, 836 headers=headers) 837 return self._HandleSuccessfulUpload(dst_uri, object_metadata, 838 fields=fields) 839 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 840 not_found_exception = CreateNotFoundExceptionForObjectWrite( 841 self.provider, object_metadata.bucket) 842 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket, 843 object_name=object_metadata.name, 844 not_found_exception=not_found_exception) 845 846 def UploadObjectStreaming(self, upload_stream, object_metadata, 847 canned_acl=None, progress_callback=None, 848 preconditions=None, provider=None, fields=None): 849 """See CloudApi class for function doc strings.""" 850 headers, dst_uri = self._UploadSetup(object_metadata, 851 preconditions=preconditions) 852 853 try: 854 self._PerformStreamingUpload( 855 dst_uri, upload_stream, canned_acl=canned_acl, 856 progress_callback=progress_callback, headers=headers) 857 return self._HandleSuccessfulUpload(dst_uri, object_metadata, 858 fields=fields) 859 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 860 not_found_exception = CreateNotFoundExceptionForObjectWrite( 861 self.provider, object_metadata.bucket) 862 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket, 863 object_name=object_metadata.name, 864 not_found_exception=not_found_exception) 865 866 def UploadObject(self, upload_stream, object_metadata, canned_acl=None, 867 preconditions=None, size=None, progress_callback=None, 868 provider=None, fields=None): 869 """See CloudApi class for function doc strings.""" 870 headers, dst_uri = self._UploadSetup(object_metadata, 871 preconditions=preconditions) 872 873 try: 874 md5 = None 875 if object_metadata.md5Hash: 876 md5 = [] 877 # boto expects hex at index 0, base64 at index 1 878 md5.append(Base64ToHexHash(object_metadata.md5Hash)) 879 md5.append(object_metadata.md5Hash.strip('\n"\'')) 880 self._PerformSimpleUpload(dst_uri, upload_stream, md5=md5, 881 canned_acl=canned_acl, 882 progress_callback=progress_callback, 883 headers=headers) 884 return self._HandleSuccessfulUpload(dst_uri, object_metadata, 885 fields=fields) 886 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 887 not_found_exception = CreateNotFoundExceptionForObjectWrite( 888 self.provider, object_metadata.bucket) 889 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket, 890 object_name=object_metadata.name, 891 not_found_exception=not_found_exception) 892 893 def DeleteObject(self, bucket_name, object_name, preconditions=None, 894 generation=None, provider=None): 895 """See CloudApi class for function doc strings.""" 896 _ = provider 897 headers = {} 898 self._AddApiVersionToHeaders(headers) 899 self._AddPreconditionsToHeaders(preconditions, headers) 900 901 uri = self._StorageUriForObject(bucket_name, object_name, 902 generation=generation) 903 try: 904 uri.delete_key(validate=False, headers=headers) 905 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 906 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name, 907 object_name=object_name, 908 generation=generation) 909 910 def CopyObject(self, src_obj_metadata, dst_obj_metadata, src_generation=None, 911 canned_acl=None, preconditions=None, progress_callback=None, 912 max_bytes_per_call=None, provider=None, fields=None): 913 """See CloudApi class for function doc strings.""" 914 _ = provider 915 916 if max_bytes_per_call is not None: 917 raise NotImplementedError('XML API does not suport max_bytes_per_call') 918 dst_uri = self._StorageUriForObject(dst_obj_metadata.bucket, 919 dst_obj_metadata.name) 920 921 # Usually it's okay to treat version_id and generation as 922 # the same, but in this case the underlying boto call determines the 923 # provider based on the presence of one or the other. 924 src_version_id = None 925 if self.provider == 's3': 926 src_version_id = src_generation 927 src_generation = None 928 929 headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider) 930 self._AddApiVersionToHeaders(headers) 931 self._AddPreconditionsToHeaders(preconditions, headers) 932 933 if canned_acl: 934 headers[dst_uri.get_provider().acl_header] = canned_acl 935 936 preserve_acl = True if dst_obj_metadata.acl else False 937 if self.provider == 's3': 938 s3_acl = S3MarkerAclFromObjectMetadata(dst_obj_metadata) 939 if s3_acl: 940 preserve_acl = True 941 942 try: 943 new_key = dst_uri.copy_key( 944 src_obj_metadata.bucket, src_obj_metadata.name, 945 preserve_acl=preserve_acl, headers=headers, 946 src_version_id=src_version_id, src_generation=src_generation) 947 948 return self._BotoKeyToObject(new_key, fields=fields) 949 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 950 not_found_exception = CreateNotFoundExceptionForObjectWrite( 951 self.provider, dst_obj_metadata.bucket, src_provider=self.provider, 952 src_bucket_name=src_obj_metadata.bucket, 953 src_object_name=src_obj_metadata.name, src_generation=src_generation) 954 self._TranslateExceptionAndRaise(e, bucket_name=dst_obj_metadata.bucket, 955 object_name=dst_obj_metadata.name, 956 not_found_exception=not_found_exception) 957 958 def ComposeObject(self, src_objs_metadata, dst_obj_metadata, 959 preconditions=None, provider=None, fields=None): 960 """See CloudApi class for function doc strings.""" 961 _ = provider 962 ValidateDstObjectMetadata(dst_obj_metadata) 963 964 dst_obj_name = dst_obj_metadata.name 965 dst_obj_metadata.name = None 966 dst_bucket_name = dst_obj_metadata.bucket 967 dst_obj_metadata.bucket = None 968 headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider) 969 if not dst_obj_metadata.contentType: 970 dst_obj_metadata.contentType = DEFAULT_CONTENT_TYPE 971 headers['content-type'] = dst_obj_metadata.contentType 972 self._AddApiVersionToHeaders(headers) 973 self._AddPreconditionsToHeaders(preconditions, headers) 974 975 dst_uri = self._StorageUriForObject(dst_bucket_name, dst_obj_name) 976 977 src_components = [] 978 for src_obj in src_objs_metadata: 979 src_uri = self._StorageUriForObject(dst_bucket_name, src_obj.name, 980 generation=src_obj.generation) 981 src_components.append(src_uri) 982 983 try: 984 dst_uri.compose(src_components, headers=headers) 985 986 return self.GetObjectMetadata(dst_bucket_name, dst_obj_name, 987 fields=fields) 988 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 989 self._TranslateExceptionAndRaise(e, dst_obj_metadata.bucket, 990 dst_obj_metadata.name) 991 992 def _AddPreconditionsToHeaders(self, preconditions, headers): 993 """Adds preconditions (if any) to headers.""" 994 if preconditions and self.provider == 'gs': 995 if preconditions.gen_match is not None: 996 headers['x-goog-if-generation-match'] = str(preconditions.gen_match) 997 if preconditions.meta_gen_match is not None: 998 headers['x-goog-if-metageneration-match'] = str( 999 preconditions.meta_gen_match) 1000 1001 def _AddApiVersionToHeaders(self, headers): 1002 if self.provider == 'gs': 1003 headers['x-goog-api-version'] = self.api_version 1004 1005 def _GetMD5FromETag(self, src_etag): 1006 """Returns an MD5 from the etag iff the etag is a valid MD5 hash. 1007 1008 Args: 1009 src_etag: Object etag for which to return the MD5. 1010 1011 Returns: 1012 MD5 in hex string format, or None. 1013 """ 1014 if src_etag and MD5_REGEX.search(src_etag): 1015 return src_etag.strip('"\'').lower() 1016 1017 def _StorageUriForBucket(self, bucket): 1018 """Returns a boto storage_uri for the given bucket name. 1019 1020 Args: 1021 bucket: Bucket name (string). 1022 1023 Returns: 1024 Boto storage_uri for the bucket. 1025 """ 1026 return boto.storage_uri( 1027 '%s://%s' % (self.provider, bucket), 1028 suppress_consec_slashes=False, 1029 bucket_storage_uri_class=self.bucket_storage_uri_class, 1030 debug=self.debug) 1031 1032 def _StorageUriForObject(self, bucket, object_name, generation=None): 1033 """Returns a boto storage_uri for the given object. 1034 1035 Args: 1036 bucket: Bucket name (string). 1037 object_name: Object name (string). 1038 generation: Generation or version_id of object. If None, live version 1039 of the object is used. 1040 1041 Returns: 1042 Boto storage_uri for the object. 1043 """ 1044 uri_string = '%s://%s/%s' % (self.provider, bucket, object_name) 1045 if generation: 1046 uri_string += '#%s' % generation 1047 return boto.storage_uri( 1048 uri_string, suppress_consec_slashes=False, 1049 bucket_storage_uri_class=self.bucket_storage_uri_class, 1050 debug=self.debug) 1051 1052 def _GetBotoKey(self, bucket_name, object_name, generation=None): 1053 """Gets the boto key for an object. 1054 1055 Args: 1056 bucket_name: Bucket containing the object. 1057 object_name: Object name. 1058 generation: Generation or version of the object to retrieve. 1059 1060 Returns: 1061 Boto key for the object. 1062 """ 1063 object_uri = self._StorageUriForObject(bucket_name, object_name, 1064 generation=generation) 1065 try: 1066 key = object_uri.get_key() 1067 if not key: 1068 raise CreateObjectNotFoundException('404', self.provider, 1069 bucket_name, object_name, 1070 generation=generation) 1071 return key 1072 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1073 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name, 1074 object_name=object_name, 1075 generation=generation) 1076 1077 def _ListToGetFields(self, list_fields=None): 1078 """Removes 'items/' from the input fields and converts it to a set. 1079 1080 This way field sets requested for ListBucket/ListObject can be used in 1081 _BotoBucketToBucket and _BotoKeyToObject calls. 1082 1083 Args: 1084 list_fields: Iterable fields usable in ListBucket/ListObject calls. 1085 1086 Returns: 1087 Set of fields usable in GetBucket/GetObject or 1088 _BotoBucketToBucket/_BotoKeyToObject calls. 1089 """ 1090 if list_fields: 1091 get_fields = set() 1092 for field in list_fields: 1093 if field in ['kind', 'nextPageToken', 'prefixes']: 1094 # These are not actually object / bucket metadata fields. 1095 # They are fields specific to listing, so we don't consider them. 1096 continue 1097 get_fields.add(re.sub(r'items/', '', field)) 1098 return get_fields 1099 1100 # pylint: disable=too-many-statements 1101 def _BotoBucketToBucket(self, bucket, fields=None): 1102 """Constructs an apitools Bucket from a boto bucket. 1103 1104 Args: 1105 bucket: Boto bucket. 1106 fields: If present, construct the apitools Bucket with only this set of 1107 metadata fields. 1108 1109 Returns: 1110 apitools Bucket. 1111 """ 1112 bucket_uri = self._StorageUriForBucket(bucket.name) 1113 1114 cloud_api_bucket = apitools_messages.Bucket(name=bucket.name, 1115 id=bucket.name) 1116 headers = {} 1117 self._AddApiVersionToHeaders(headers) 1118 if self.provider == 'gs': 1119 if not fields or 'storageClass' in fields: 1120 if hasattr(bucket, 'get_storage_class'): 1121 cloud_api_bucket.storageClass = bucket.get_storage_class() 1122 if not fields or 'acl' in fields: 1123 for acl in AclTranslation.BotoBucketAclToMessage( 1124 bucket.get_acl(headers=headers)): 1125 try: 1126 cloud_api_bucket.acl.append(acl) 1127 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1128 translated_exception = self._TranslateBotoException( 1129 e, bucket_name=bucket.name) 1130 if (translated_exception and 1131 isinstance(translated_exception, 1132 AccessDeniedException)): 1133 # JSON API doesn't differentiate between a blank ACL list 1134 # and an access denied, so this is intentionally left blank. 1135 pass 1136 else: 1137 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name) 1138 if not fields or 'cors' in fields: 1139 try: 1140 boto_cors = bucket_uri.get_cors() 1141 cloud_api_bucket.cors = CorsTranslation.BotoCorsToMessage(boto_cors) 1142 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1143 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name) 1144 if not fields or 'defaultObjectAcl' in fields: 1145 for acl in AclTranslation.BotoObjectAclToMessage( 1146 bucket.get_def_acl(headers=headers)): 1147 try: 1148 cloud_api_bucket.defaultObjectAcl.append(acl) 1149 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1150 translated_exception = self._TranslateBotoException( 1151 e, bucket_name=bucket.name) 1152 if (translated_exception and 1153 isinstance(translated_exception, 1154 AccessDeniedException)): 1155 # JSON API doesn't differentiate between a blank ACL list 1156 # and an access denied, so this is intentionally left blank. 1157 pass 1158 else: 1159 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name) 1160 if not fields or 'lifecycle' in fields: 1161 try: 1162 boto_lifecycle = bucket_uri.get_lifecycle_config() 1163 cloud_api_bucket.lifecycle = ( 1164 LifecycleTranslation.BotoLifecycleToMessage(boto_lifecycle)) 1165 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1166 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name) 1167 if not fields or 'logging' in fields: 1168 try: 1169 boto_logging = bucket_uri.get_logging_config() 1170 if boto_logging and 'Logging' in boto_logging: 1171 logging_config = boto_logging['Logging'] 1172 log_object_prefix_present = 'LogObjectPrefix' in logging_config 1173 log_bucket_present = 'LogBucket' in logging_config 1174 if log_object_prefix_present or log_bucket_present: 1175 cloud_api_bucket.logging = apitools_messages.Bucket.LoggingValue() 1176 if log_object_prefix_present: 1177 cloud_api_bucket.logging.logObjectPrefix = ( 1178 logging_config['LogObjectPrefix']) 1179 if log_bucket_present: 1180 cloud_api_bucket.logging.logBucket = logging_config['LogBucket'] 1181 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1182 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name) 1183 if not fields or 'website' in fields: 1184 try: 1185 boto_website = bucket_uri.get_website_config() 1186 if boto_website and 'WebsiteConfiguration' in boto_website: 1187 website_config = boto_website['WebsiteConfiguration'] 1188 main_page_suffix_present = 'MainPageSuffix' in website_config 1189 not_found_page_present = 'NotFoundPage' in website_config 1190 if main_page_suffix_present or not_found_page_present: 1191 cloud_api_bucket.website = apitools_messages.Bucket.WebsiteValue() 1192 if main_page_suffix_present: 1193 cloud_api_bucket.website.mainPageSuffix = ( 1194 website_config['MainPageSuffix']) 1195 if not_found_page_present: 1196 cloud_api_bucket.website.notFoundPage = ( 1197 website_config['NotFoundPage']) 1198 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1199 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name) 1200 if not fields or 'location' in fields: 1201 cloud_api_bucket.location = bucket_uri.get_location() 1202 if not fields or 'versioning' in fields: 1203 versioning = bucket_uri.get_versioning_config(headers=headers) 1204 if versioning: 1205 if (self.provider == 's3' and 'Versioning' in versioning and 1206 versioning['Versioning'] == 'Enabled'): 1207 cloud_api_bucket.versioning = ( 1208 apitools_messages.Bucket.VersioningValue(enabled=True)) 1209 elif self.provider == 'gs': 1210 cloud_api_bucket.versioning = ( 1211 apitools_messages.Bucket.VersioningValue(enabled=True)) 1212 1213 # For S3 long bucket listing we do not support CORS, lifecycle, website, and 1214 # logging translation. The individual commands can be used to get 1215 # the XML equivalents for S3. 1216 return cloud_api_bucket 1217 1218 def _BotoKeyToObject(self, key, fields=None): 1219 """Constructs an apitools Object from a boto key. 1220 1221 Args: 1222 key: Boto key to construct Object from. 1223 fields: If present, construct the apitools Object with only this set of 1224 metadata fields. 1225 1226 Returns: 1227 apitools Object corresponding to key. 1228 """ 1229 custom_metadata = None 1230 if not fields or 'metadata' in fields: 1231 custom_metadata = self._TranslateBotoKeyCustomMetadata(key) 1232 cache_control = None 1233 if not fields or 'cacheControl' in fields: 1234 cache_control = getattr(key, 'cache_control', None) 1235 component_count = None 1236 if not fields or 'componentCount' in fields: 1237 component_count = getattr(key, 'component_count', None) 1238 content_disposition = None 1239 if not fields or 'contentDisposition' in fields: 1240 content_disposition = getattr(key, 'content_disposition', None) 1241 # Other fields like updated and ACL depend on the generation 1242 # of the object, so populate that regardless of whether it was requested. 1243 generation = self._TranslateBotoKeyGeneration(key) 1244 metageneration = None 1245 if not fields or 'metageneration' in fields: 1246 metageneration = self._TranslateBotoKeyMetageneration(key) 1247 updated = None 1248 # Translation code to avoid a dependency on dateutil. 1249 if not fields or 'updated' in fields: 1250 updated = self._TranslateBotoKeyTimestamp(key) 1251 etag = None 1252 if not fields or 'etag' in fields: 1253 etag = getattr(key, 'etag', None) 1254 if etag: 1255 etag = etag.strip('"\'') 1256 crc32c = None 1257 if not fields or 'crc32c' in fields: 1258 if hasattr(key, 'cloud_hashes') and 'crc32c' in key.cloud_hashes: 1259 crc32c = base64.encodestring(key.cloud_hashes['crc32c']).rstrip('\n') 1260 md5_hash = None 1261 if not fields or 'md5Hash' in fields: 1262 if hasattr(key, 'cloud_hashes') and 'md5' in key.cloud_hashes: 1263 md5_hash = base64.encodestring(key.cloud_hashes['md5']).rstrip('\n') 1264 elif self._GetMD5FromETag(getattr(key, 'etag', None)): 1265 md5_hash = Base64EncodeHash(self._GetMD5FromETag(key.etag)) 1266 elif self.provider == 's3': 1267 # S3 etags are MD5s for non-multi-part objects, but multi-part objects 1268 # (which include all objects >= 5 GB) have a custom checksum 1269 # implementation that is not currently supported by gsutil. 1270 self.logger.warn( 1271 'Non-MD5 etag (%s) present for key %s, data integrity checks are ' 1272 'not possible.', key.etag, key) 1273 1274 # Serialize the boto key in the media link if it is requested. This 1275 # way we can later access the key without adding an HTTP call. 1276 media_link = None 1277 if not fields or 'mediaLink' in fields: 1278 media_link = binascii.b2a_base64( 1279 pickle.dumps(key, pickle.HIGHEST_PROTOCOL)) 1280 size = None 1281 if not fields or 'size' in fields: 1282 size = key.size or 0 1283 storage_class = None 1284 if not fields or 'storageClass' in fields: 1285 storage_class = getattr(key, 'storage_class', None) 1286 1287 cloud_api_object = apitools_messages.Object( 1288 bucket=key.bucket.name, 1289 name=key.name, 1290 size=size, 1291 contentEncoding=key.content_encoding, 1292 contentLanguage=key.content_language, 1293 contentType=key.content_type, 1294 cacheControl=cache_control, 1295 contentDisposition=content_disposition, 1296 etag=etag, 1297 crc32c=crc32c, 1298 md5Hash=md5_hash, 1299 generation=generation, 1300 metageneration=metageneration, 1301 componentCount=component_count, 1302 updated=updated, 1303 metadata=custom_metadata, 1304 mediaLink=media_link, 1305 storageClass=storage_class) 1306 1307 # Remaining functions amend cloud_api_object. 1308 self._TranslateDeleteMarker(key, cloud_api_object) 1309 if not fields or 'acl' in fields: 1310 generation_str = GenerationFromUrlAndString( 1311 StorageUrlFromString(self.provider), generation) 1312 self._TranslateBotoKeyAcl(key, cloud_api_object, 1313 generation=generation_str) 1314 1315 return cloud_api_object 1316 1317 def _TranslateBotoKeyCustomMetadata(self, key): 1318 """Populates an apitools message from custom metadata in the boto key.""" 1319 custom_metadata = None 1320 if getattr(key, 'metadata', None): 1321 custom_metadata = apitools_messages.Object.MetadataValue( 1322 additionalProperties=[]) 1323 for k, v in key.metadata.iteritems(): 1324 if k.lower() == 'content-language': 1325 # Work around content-language being inserted into custom metadata. 1326 continue 1327 custom_metadata.additionalProperties.append( 1328 apitools_messages.Object.MetadataValue.AdditionalProperty( 1329 key=k, value=v)) 1330 return custom_metadata 1331 1332 def _TranslateBotoKeyGeneration(self, key): 1333 """Returns the generation/version_id number from the boto key if present.""" 1334 generation = None 1335 if self.provider == 'gs': 1336 if getattr(key, 'generation', None): 1337 generation = long(key.generation) 1338 elif self.provider == 's3': 1339 if getattr(key, 'version_id', None): 1340 generation = EncodeStringAsLong(key.version_id) 1341 return generation 1342 1343 def _TranslateBotoKeyMetageneration(self, key): 1344 """Returns the metageneration number from the boto key if present.""" 1345 metageneration = None 1346 if self.provider == 'gs': 1347 if getattr(key, 'metageneration', None): 1348 metageneration = long(key.metageneration) 1349 return metageneration 1350 1351 def _TranslateBotoKeyTimestamp(self, key): 1352 """Parses the timestamp from the boto key into an datetime object. 1353 1354 This avoids a dependency on dateutil. 1355 1356 Args: 1357 key: Boto key to get timestamp from. 1358 1359 Returns: 1360 datetime object if string is parsed successfully, None otherwise. 1361 """ 1362 if key.last_modified: 1363 if '.' in key.last_modified: 1364 key_us_timestamp = key.last_modified.rstrip('Z') + '000Z' 1365 else: 1366 key_us_timestamp = key.last_modified.rstrip('Z') + '.000000Z' 1367 fmt = '%Y-%m-%dT%H:%M:%S.%fZ' 1368 try: 1369 return datetime.datetime.strptime(key_us_timestamp, fmt) 1370 except ValueError: 1371 try: 1372 # Try alternate format 1373 fmt = '%a, %d %b %Y %H:%M:%S %Z' 1374 return datetime.datetime.strptime(key.last_modified, fmt) 1375 except ValueError: 1376 # Could not parse the time; leave updated as None. 1377 return None 1378 1379 def _TranslateDeleteMarker(self, key, cloud_api_object): 1380 """Marks deleted objects with a metadata value (for S3 compatibility).""" 1381 if isinstance(key, DeleteMarker): 1382 if not cloud_api_object.metadata: 1383 cloud_api_object.metadata = apitools_messages.Object.MetadataValue() 1384 cloud_api_object.metadata.additionalProperties = [] 1385 cloud_api_object.metadata.additionalProperties.append( 1386 apitools_messages.Object.MetadataValue.AdditionalProperty( 1387 key=S3_DELETE_MARKER_GUID, value=True)) 1388 1389 def _TranslateBotoKeyAcl(self, key, cloud_api_object, generation=None): 1390 """Updates cloud_api_object with the ACL from the boto key.""" 1391 storage_uri_for_key = self._StorageUriForObject(key.bucket.name, key.name, 1392 generation=generation) 1393 headers = {} 1394 self._AddApiVersionToHeaders(headers) 1395 try: 1396 if self.provider == 'gs': 1397 key_acl = storage_uri_for_key.get_acl(headers=headers) 1398 # key.get_acl() does not support versioning so we need to use 1399 # storage_uri to ensure we're getting the versioned ACL. 1400 for acl in AclTranslation.BotoObjectAclToMessage(key_acl): 1401 cloud_api_object.acl.append(acl) 1402 if self.provider == 's3': 1403 key_acl = key.get_xml_acl(headers=headers) 1404 # ACLs for s3 are different and we use special markers to represent 1405 # them in the gsutil Cloud API. 1406 AddS3MarkerAclToObjectMetadata(cloud_api_object, key_acl) 1407 except boto.exception.GSResponseError, e: 1408 if e.status == 403: 1409 # Consume access denied exceptions to mimic JSON behavior of simply 1410 # returning None if sufficient permission is not present. The caller 1411 # needs to handle the case where the ACL is not populated. 1412 pass 1413 else: 1414 raise 1415 1416 def _TranslateExceptionAndRaise(self, e, bucket_name=None, object_name=None, 1417 generation=None, not_found_exception=None): 1418 """Translates a Boto exception and raises the translated or original value. 1419 1420 Args: 1421 e: Any Exception. 1422 bucket_name: Optional bucket name in request that caused the exception. 1423 object_name: Optional object name in request that caused the exception. 1424 generation: Optional generation in request that caused the exception. 1425 not_found_exception: Optional exception to raise in the not-found case. 1426 1427 Raises: 1428 Translated CloudApi exception, or the original exception if it was not 1429 translatable. 1430 """ 1431 translated_exception = self._TranslateBotoException( 1432 e, bucket_name=bucket_name, object_name=object_name, 1433 generation=generation, not_found_exception=not_found_exception) 1434 if translated_exception: 1435 raise translated_exception 1436 else: 1437 raise 1438 1439 def _TranslateBotoException(self, e, bucket_name=None, object_name=None, 1440 generation=None, not_found_exception=None): 1441 """Translates boto exceptions into their gsutil Cloud API equivalents. 1442 1443 Args: 1444 e: Any exception in TRANSLATABLE_BOTO_EXCEPTIONS. 1445 bucket_name: Optional bucket name in request that caused the exception. 1446 object_name: Optional object name in request that caused the exception. 1447 generation: Optional generation in request that caused the exception. 1448 not_found_exception: Optional exception to raise in the not-found case. 1449 1450 Returns: 1451 CloudStorageApiServiceException for translatable exceptions, None 1452 otherwise. 1453 1454 Because we're using isinstance, check for subtypes first. 1455 """ 1456 if isinstance(e, boto.exception.StorageResponseError): 1457 if e.status == 400: 1458 return BadRequestException(e.code, status=e.status, body=e.body) 1459 elif e.status == 401 or e.status == 403: 1460 return AccessDeniedException(e.code, status=e.status, body=e.body) 1461 elif e.status == 404: 1462 if not_found_exception: 1463 # The exception is pre-constructed prior to translation; the HTTP 1464 # status code isn't available at that time. 1465 setattr(not_found_exception, 'status', e.status) 1466 return not_found_exception 1467 elif bucket_name: 1468 if object_name: 1469 return CreateObjectNotFoundException(e.status, self.provider, 1470 bucket_name, object_name, 1471 generation=generation) 1472 return CreateBucketNotFoundException(e.status, self.provider, 1473 bucket_name) 1474 return NotFoundException(e.message, status=e.status, body=e.body) 1475 1476 elif e.status == 409 and e.code and 'BucketNotEmpty' in e.code: 1477 return NotEmptyException('BucketNotEmpty (%s)' % bucket_name, 1478 status=e.status, body=e.body) 1479 elif e.status == 410: 1480 # 410 errors should always cause us to start over - either the UploadID 1481 # has expired or there was a server-side problem that requires starting 1482 # the upload over from scratch. 1483 return ResumableUploadStartOverException(e.message) 1484 elif e.status == 412: 1485 return PreconditionException(e.code, status=e.status, body=e.body) 1486 if isinstance(e, boto.exception.StorageCreateError): 1487 return ServiceException('Bucket already exists.', status=e.status, 1488 body=e.body) 1489 1490 if isinstance(e, boto.exception.BotoServerError): 1491 return ServiceException(e.message, status=e.status, body=e.body) 1492 1493 if isinstance(e, boto.exception.InvalidUriError): 1494 # Work around textwrap when searching for this string. 1495 if e.message and NON_EXISTENT_OBJECT_REGEX.match(e.message.encode(UTF8)): 1496 return NotFoundException(e.message, status=404) 1497 return InvalidUrlError(e.message) 1498 1499 if isinstance(e, boto.exception.ResumableUploadException): 1500 if e.disposition == boto.exception.ResumableTransferDisposition.ABORT: 1501 return ResumableUploadAbortException(e.message) 1502 elif (e.disposition == 1503 boto.exception.ResumableTransferDisposition.START_OVER): 1504 return ResumableUploadStartOverException(e.message) 1505 else: 1506 return ResumableUploadException(e.message) 1507 1508 if isinstance(e, boto.exception.ResumableDownloadException): 1509 return ResumableDownloadException(e.message) 1510 1511 return None 1512 1513 # For function docstrings, see CloudApiDelegator class. 1514 def XmlPassThroughGetAcl(self, storage_url, def_obj_acl=False): 1515 """See CloudApiDelegator class for function doc strings.""" 1516 try: 1517 uri = boto.storage_uri( 1518 storage_url.url_string, suppress_consec_slashes=False, 1519 bucket_storage_uri_class=self.bucket_storage_uri_class, 1520 debug=self.debug) 1521 if def_obj_acl: 1522 return uri.get_def_acl() 1523 else: 1524 return uri.get_acl() 1525 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1526 self._TranslateExceptionAndRaise(e) 1527 1528 def XmlPassThroughSetAcl(self, acl_text, storage_url, canned=True, 1529 def_obj_acl=False): 1530 """See CloudApiDelegator class for function doc strings.""" 1531 try: 1532 uri = boto.storage_uri( 1533 storage_url.url_string, suppress_consec_slashes=False, 1534 bucket_storage_uri_class=self.bucket_storage_uri_class, 1535 debug=self.debug) 1536 if canned: 1537 if def_obj_acl: 1538 canned_acls = uri.canned_acls() 1539 if acl_text not in canned_acls: 1540 raise CommandException('Invalid canned ACL "%s".' % acl_text) 1541 uri.set_def_acl(acl_text, uri.object_name) 1542 else: 1543 canned_acls = uri.canned_acls() 1544 if acl_text not in canned_acls: 1545 raise CommandException('Invalid canned ACL "%s".' % acl_text) 1546 uri.set_acl(acl_text, uri.object_name) 1547 else: 1548 if def_obj_acl: 1549 uri.set_def_xml_acl(acl_text, uri.object_name) 1550 else: 1551 uri.set_xml_acl(acl_text, uri.object_name) 1552 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1553 self._TranslateExceptionAndRaise(e) 1554 1555 # pylint: disable=catching-non-exception 1556 def XmlPassThroughSetCors(self, cors_text, storage_url): 1557 """See CloudApiDelegator class for function doc strings.""" 1558 # Parse XML document and convert into Cors object. 1559 if storage_url.scheme == 's3': 1560 cors_obj = S3Cors() 1561 else: 1562 cors_obj = Cors() 1563 h = handler.XmlHandler(cors_obj, None) 1564 try: 1565 xml.sax.parseString(cors_text, h) 1566 except SaxExceptions.SAXParseException, e: 1567 raise CommandException('Requested CORS is invalid: %s at line %s, ' 1568 'column %s' % (e.getMessage(), e.getLineNumber(), 1569 e.getColumnNumber())) 1570 1571 try: 1572 uri = boto.storage_uri( 1573 storage_url.url_string, suppress_consec_slashes=False, 1574 bucket_storage_uri_class=self.bucket_storage_uri_class, 1575 debug=self.debug) 1576 uri.set_cors(cors_obj, False) 1577 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1578 self._TranslateExceptionAndRaise(e) 1579 1580 def XmlPassThroughGetCors(self, storage_url): 1581 """See CloudApiDelegator class for function doc strings.""" 1582 uri = boto.storage_uri( 1583 storage_url.url_string, suppress_consec_slashes=False, 1584 bucket_storage_uri_class=self.bucket_storage_uri_class, 1585 debug=self.debug) 1586 try: 1587 cors = uri.get_cors(False) 1588 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1589 self._TranslateExceptionAndRaise(e) 1590 1591 parsed_xml = xml.dom.minidom.parseString(cors.to_xml().encode(UTF8)) 1592 # Pretty-print the XML to make it more easily human editable. 1593 return parsed_xml.toprettyxml(indent=' ') 1594 1595 def XmlPassThroughGetLifecycle(self, storage_url): 1596 """See CloudApiDelegator class for function doc strings.""" 1597 try: 1598 uri = boto.storage_uri( 1599 storage_url.url_string, suppress_consec_slashes=False, 1600 bucket_storage_uri_class=self.bucket_storage_uri_class, 1601 debug=self.debug) 1602 lifecycle = uri.get_lifecycle_config(False) 1603 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1604 self._TranslateExceptionAndRaise(e) 1605 1606 parsed_xml = xml.dom.minidom.parseString(lifecycle.to_xml().encode(UTF8)) 1607 # Pretty-print the XML to make it more easily human editable. 1608 return parsed_xml.toprettyxml(indent=' ') 1609 1610 def XmlPassThroughSetLifecycle(self, lifecycle_text, storage_url): 1611 """See CloudApiDelegator class for function doc strings.""" 1612 # Parse XML document and convert into lifecycle object. 1613 if storage_url.scheme == 's3': 1614 lifecycle_obj = S3Lifecycle() 1615 else: 1616 lifecycle_obj = LifecycleConfig() 1617 h = handler.XmlHandler(lifecycle_obj, None) 1618 try: 1619 xml.sax.parseString(lifecycle_text, h) 1620 except SaxExceptions.SAXParseException, e: 1621 raise CommandException( 1622 'Requested lifecycle config is invalid: %s at line %s, column %s' % 1623 (e.getMessage(), e.getLineNumber(), e.getColumnNumber())) 1624 1625 try: 1626 uri = boto.storage_uri( 1627 storage_url.url_string, suppress_consec_slashes=False, 1628 bucket_storage_uri_class=self.bucket_storage_uri_class, 1629 debug=self.debug) 1630 uri.configure_lifecycle(lifecycle_obj, False) 1631 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1632 self._TranslateExceptionAndRaise(e) 1633 1634 def XmlPassThroughGetLogging(self, storage_url): 1635 """See CloudApiDelegator class for function doc strings.""" 1636 try: 1637 uri = boto.storage_uri( 1638 storage_url.url_string, suppress_consec_slashes=False, 1639 bucket_storage_uri_class=self.bucket_storage_uri_class, 1640 debug=self.debug) 1641 logging_config_xml = UnaryDictToXml(uri.get_logging_config()) 1642 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1643 self._TranslateExceptionAndRaise(e) 1644 1645 return XmlParseString(logging_config_xml).toprettyxml() 1646 1647 def XmlPassThroughGetWebsite(self, storage_url): 1648 """See CloudApiDelegator class for function doc strings.""" 1649 try: 1650 uri = boto.storage_uri( 1651 storage_url.url_string, suppress_consec_slashes=False, 1652 bucket_storage_uri_class=self.bucket_storage_uri_class, 1653 debug=self.debug) 1654 web_config_xml = UnaryDictToXml(uri.get_website_config()) 1655 except TRANSLATABLE_BOTO_EXCEPTIONS, e: 1656 self._TranslateExceptionAndRaise(e) 1657 1658 return XmlParseString(web_config_xml).toprettyxml() 1659