• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2# Copyright 2013 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""XML/boto gsutil Cloud API implementation for GCS and Amazon S3."""
16
17from __future__ import absolute_import
18
19import base64
20import binascii
21import datetime
22import errno
23import httplib
24import json
25import multiprocessing
26import os
27import pickle
28import random
29import re
30import socket
31import tempfile
32import textwrap
33import threading
34import time
35import xml
36from xml.dom.minidom import parseString as XmlParseString
37from xml.sax import _exceptions as SaxExceptions
38
39import boto
40from boto import handler
41from boto.exception import ResumableDownloadException as BotoResumableDownloadException
42from boto.exception import ResumableTransferDisposition
43from boto.gs.cors import Cors
44from boto.gs.lifecycle import LifecycleConfig
45from boto.s3.cors import CORSConfiguration as S3Cors
46from boto.s3.deletemarker import DeleteMarker
47from boto.s3.lifecycle import Lifecycle as S3Lifecycle
48from boto.s3.prefix import Prefix
49
50from gslib.boto_resumable_upload import BotoResumableUpload
51from gslib.cloud_api import AccessDeniedException
52from gslib.cloud_api import ArgumentException
53from gslib.cloud_api import BadRequestException
54from gslib.cloud_api import CloudApi
55from gslib.cloud_api import NotEmptyException
56from gslib.cloud_api import NotFoundException
57from gslib.cloud_api import PreconditionException
58from gslib.cloud_api import ResumableDownloadException
59from gslib.cloud_api import ResumableUploadAbortException
60from gslib.cloud_api import ResumableUploadException
61from gslib.cloud_api import ResumableUploadStartOverException
62from gslib.cloud_api import ServiceException
63from gslib.cloud_api_helper import ValidateDstObjectMetadata
64# Imported for boto AuthHandler purposes.
65import gslib.devshell_auth_plugin  # pylint: disable=unused-import
66from gslib.exception import CommandException
67from gslib.exception import InvalidUrlError
68from gslib.hashing_helper import Base64EncodeHash
69from gslib.hashing_helper import Base64ToHexHash
70from gslib.project_id import GOOG_PROJ_ID_HDR
71from gslib.project_id import PopulateProjectId
72from gslib.storage_url import StorageUrlFromString
73from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
74from gslib.translation_helper import AclTranslation
75from gslib.translation_helper import AddS3MarkerAclToObjectMetadata
76from gslib.translation_helper import CorsTranslation
77from gslib.translation_helper import CreateBucketNotFoundException
78from gslib.translation_helper import CreateNotFoundExceptionForObjectWrite
79from gslib.translation_helper import CreateObjectNotFoundException
80from gslib.translation_helper import DEFAULT_CONTENT_TYPE
81from gslib.translation_helper import EncodeStringAsLong
82from gslib.translation_helper import GenerationFromUrlAndString
83from gslib.translation_helper import HeadersFromObjectMetadata
84from gslib.translation_helper import LifecycleTranslation
85from gslib.translation_helper import REMOVE_CORS_CONFIG
86from gslib.translation_helper import S3MarkerAclFromObjectMetadata
87from gslib.util import ConfigureNoOpAuthIfNeeded
88from gslib.util import DEFAULT_FILE_BUFFER_SIZE
89from gslib.util import GetMaxRetryDelay
90from gslib.util import GetNumRetries
91from gslib.util import S3_DELETE_MARKER_GUID
92from gslib.util import TWO_MIB
93from gslib.util import UnaryDictToXml
94from gslib.util import UTF8
95from gslib.util import XML_PROGRESS_CALLBACKS
96
97TRANSLATABLE_BOTO_EXCEPTIONS = (boto.exception.BotoServerError,
98                                boto.exception.InvalidUriError,
99                                boto.exception.ResumableDownloadException,
100                                boto.exception.ResumableUploadException,
101                                boto.exception.StorageCreateError,
102                                boto.exception.StorageResponseError)
103
104# pylint: disable=global-at-module-level
105global boto_auth_initialized, boto_auth_initialized_lock
106# If multiprocessing is available, these will be overridden to process-safe
107# variables in InitializeMultiprocessingVariables.
108boto_auth_initialized_lock = threading.Lock()
109boto_auth_initialized = False
110
111NON_EXISTENT_OBJECT_REGEX = re.compile(r'.*non-\s*existent\s*object',
112                                       flags=re.DOTALL)
113# Determines whether an etag is a valid MD5.
114MD5_REGEX = re.compile(r'^"*[a-fA-F0-9]{32}"*$')
115
116
117def InitializeMultiprocessingVariables():  # pylint: disable=invalid-name
118  """Perform necessary initialization for multiprocessing.
119
120    See gslib.command.InitializeMultiprocessingVariables for an explanation
121    of why this is necessary.
122  """
123  # pylint: disable=global-variable-undefined
124  global boto_auth_initialized, boto_auth_initialized_lock
125  boto_auth_initialized_lock = gslib.util.CreateLock()
126  boto_auth_initialized = multiprocessing.Value('i', 0)
127
128
129class DownloadProxyCallbackHandler(object):
130  """Intermediary callback to keep track of the number of bytes downloaded."""
131
132  def __init__(self, start_byte, callback):
133    self._start_byte = start_byte
134    self._callback = callback
135
136  def call(self, bytes_downloaded, total_size):
137    """Saves necessary data and then calls the given Cloud API callback.
138
139    Args:
140      bytes_downloaded: Number of bytes processed so far.
141      total_size: Total size of the ongoing operation.
142    """
143    if self._callback:
144      self._callback(self._start_byte + bytes_downloaded, total_size)
145
146
147class BotoTranslation(CloudApi):
148  """Boto-based XML translation implementation of gsutil Cloud API.
149
150  This class takes gsutil Cloud API objects, translates them to XML service
151  calls, and translates the results back into gsutil Cloud API objects for
152  use by the caller.
153  """
154
155  def __init__(self, bucket_storage_uri_class, logger, provider=None,
156               credentials=None, debug=0, trace_token=None):
157    """Performs necessary setup for interacting with the cloud storage provider.
158
159    Args:
160      bucket_storage_uri_class: boto storage_uri class, used by APIs that
161                                provide boto translation or mocking.
162      logger: logging.logger for outputting log messages.
163      provider: Provider prefix describing cloud storage provider to connect to.
164                'gs' and 's3' are supported. Function implementations ignore
165                the provider argument and use this one instead.
166      credentials: Unused.
167      debug: Debug level for the API implementation (0..3).
168      trace_token: Unused in this subclass.
169    """
170    super(BotoTranslation, self).__init__(bucket_storage_uri_class, logger,
171                                          provider=provider, debug=debug)
172    _ = credentials
173    # pylint: disable=global-variable-undefined, global-variable-not-assigned
174    global boto_auth_initialized, boto_auth_initialized_lock
175    with boto_auth_initialized_lock:
176      ConfigureNoOpAuthIfNeeded()
177      if isinstance(boto_auth_initialized, bool):
178        boto_auth_initialized = True
179      else:
180        boto_auth_initialized.value = 1
181    self.api_version = boto.config.get_value(
182        'GSUtil', 'default_api_version', '1')
183
184  def GetBucket(self, bucket_name, provider=None, fields=None):
185    """See CloudApi class for function doc strings."""
186    _ = provider
187    bucket_uri = self._StorageUriForBucket(bucket_name)
188    headers = {}
189    self._AddApiVersionToHeaders(headers)
190    try:
191      return self._BotoBucketToBucket(bucket_uri.get_bucket(validate=True,
192                                                            headers=headers),
193                                      fields=fields)
194    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
195      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
196
197  def ListBuckets(self, project_id=None, provider=None, fields=None):
198    """See CloudApi class for function doc strings."""
199    _ = provider
200    get_fields = self._ListToGetFields(list_fields=fields)
201    headers = {}
202    self._AddApiVersionToHeaders(headers)
203    if self.provider == 'gs':
204      headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
205    try:
206      provider_uri = boto.storage_uri(
207          '%s://' % self.provider,
208          suppress_consec_slashes=False,
209          bucket_storage_uri_class=self.bucket_storage_uri_class,
210          debug=self.debug)
211
212      buckets_iter = provider_uri.get_all_buckets(headers=headers)
213      for bucket in buckets_iter:
214        if self.provider == 's3' and bucket.name.lower() != bucket.name:
215          # S3 listings can return buckets with upper-case names, but boto
216          # can't successfully call them.
217          continue
218        yield self._BotoBucketToBucket(bucket, fields=get_fields)
219    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
220      self._TranslateExceptionAndRaise(e)
221
222  def PatchBucket(self, bucket_name, metadata, canned_acl=None,
223                  canned_def_acl=None, preconditions=None, provider=None,
224                  fields=None):
225    """See CloudApi class for function doc strings."""
226    _ = provider
227    bucket_uri = self._StorageUriForBucket(bucket_name)
228    headers = {}
229    self._AddApiVersionToHeaders(headers)
230    try:
231      self._AddPreconditionsToHeaders(preconditions, headers)
232      if metadata.acl:
233        boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
234        bucket_uri.set_xml_acl(boto_acl.to_xml(), headers=headers)
235      if canned_acl:
236        canned_acls = bucket_uri.canned_acls()
237        if canned_acl not in canned_acls:
238          raise CommandException('Invalid canned ACL "%s".' % canned_acl)
239        bucket_uri.set_acl(canned_acl, bucket_uri.object_name)
240      if canned_def_acl:
241        canned_acls = bucket_uri.canned_acls()
242        if canned_def_acl not in canned_acls:
243          raise CommandException('Invalid canned ACL "%s".' % canned_def_acl)
244        bucket_uri.set_def_acl(canned_def_acl, bucket_uri.object_name)
245      if metadata.cors:
246        if metadata.cors == REMOVE_CORS_CONFIG:
247          metadata.cors = []
248        boto_cors = CorsTranslation.BotoCorsFromMessage(metadata.cors)
249        bucket_uri.set_cors(boto_cors, False)
250      if metadata.defaultObjectAcl:
251        boto_acl = AclTranslation.BotoAclFromMessage(
252            metadata.defaultObjectAcl)
253        bucket_uri.set_def_xml_acl(boto_acl.to_xml(), headers=headers)
254      if metadata.lifecycle:
255        boto_lifecycle = LifecycleTranslation.BotoLifecycleFromMessage(
256            metadata.lifecycle)
257        bucket_uri.configure_lifecycle(boto_lifecycle, False)
258      if metadata.logging:
259        if self.provider == 'gs':
260          headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(None)
261        if metadata.logging.logBucket and metadata.logging.logObjectPrefix:
262          bucket_uri.enable_logging(metadata.logging.logBucket,
263                                    metadata.logging.logObjectPrefix,
264                                    False, headers)
265        else:  # Logging field is present and empty.  Disable logging.
266          bucket_uri.disable_logging(False, headers)
267      if metadata.versioning:
268        bucket_uri.configure_versioning(metadata.versioning.enabled,
269                                        headers=headers)
270      if metadata.website:
271        main_page_suffix = metadata.website.mainPageSuffix
272        error_page = metadata.website.notFoundPage
273        bucket_uri.set_website_config(main_page_suffix, error_page)
274      return self.GetBucket(bucket_name, fields=fields)
275    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
276      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
277
278  def CreateBucket(self, bucket_name, project_id=None, metadata=None,
279                   provider=None, fields=None):
280    """See CloudApi class for function doc strings."""
281    _ = provider
282    bucket_uri = self._StorageUriForBucket(bucket_name)
283    location = ''
284    if metadata and metadata.location:
285      location = metadata.location
286    # Pass storage_class param only if this is a GCS bucket. (In S3 the
287    # storage class is specified on the key object.)
288    headers = {}
289    if bucket_uri.scheme == 'gs':
290      self._AddApiVersionToHeaders(headers)
291      headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
292      storage_class = ''
293      if metadata and metadata.storageClass:
294        storage_class = metadata.storageClass
295      try:
296        bucket_uri.create_bucket(headers=headers, location=location,
297                                 storage_class=storage_class)
298      except TRANSLATABLE_BOTO_EXCEPTIONS, e:
299        self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
300    else:
301      try:
302        bucket_uri.create_bucket(headers=headers, location=location)
303      except TRANSLATABLE_BOTO_EXCEPTIONS, e:
304        self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
305    return self.GetBucket(bucket_name, fields=fields)
306
307  def DeleteBucket(self, bucket_name, preconditions=None, provider=None):
308    """See CloudApi class for function doc strings."""
309    _ = provider, preconditions
310    bucket_uri = self._StorageUriForBucket(bucket_name)
311    headers = {}
312    self._AddApiVersionToHeaders(headers)
313    try:
314      bucket_uri.delete_bucket(headers=headers)
315    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
316      translated_exception = self._TranslateBotoException(
317          e, bucket_name=bucket_name)
318      if (translated_exception and
319          'BucketNotEmpty' in translated_exception.reason):
320        try:
321          if bucket_uri.get_versioning_config():
322            if self.provider == 's3':
323              raise NotEmptyException(
324                  'VersionedBucketNotEmpty (%s). Currently, gsutil does not '
325                  'support listing or removing S3 DeleteMarkers, so you may '
326                  'need to delete these using another tool to successfully '
327                  'delete this bucket.' % bucket_name, status=e.status)
328            raise NotEmptyException(
329                'VersionedBucketNotEmpty (%s)' % bucket_name, status=e.status)
330          else:
331            raise NotEmptyException('BucketNotEmpty (%s)' % bucket_name,
332                                    status=e.status)
333        except TRANSLATABLE_BOTO_EXCEPTIONS, e2:
334          self._TranslateExceptionAndRaise(e2, bucket_name=bucket_name)
335      elif translated_exception and translated_exception.status == 404:
336        raise NotFoundException('Bucket %s does not exist.' % bucket_name)
337      else:
338        self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
339
340  def ListObjects(self, bucket_name, prefix=None, delimiter=None,
341                  all_versions=None, provider=None, fields=None):
342    """See CloudApi class for function doc strings."""
343    _ = provider
344    get_fields = self._ListToGetFields(list_fields=fields)
345    bucket_uri = self._StorageUriForBucket(bucket_name)
346    headers = {}
347    yield_prefixes = fields is None or 'prefixes' in fields
348    yield_objects = fields is None or any(
349        field.startswith('items/') for field in fields)
350    self._AddApiVersionToHeaders(headers)
351    try:
352      objects_iter = bucket_uri.list_bucket(prefix=prefix or '',
353                                            delimiter=delimiter or '',
354                                            all_versions=all_versions,
355                                            headers=headers)
356    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
357      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
358
359    try:
360      for key in objects_iter:
361        if yield_prefixes and isinstance(key, Prefix):
362          yield CloudApi.CsObjectOrPrefix(key.name,
363                                          CloudApi.CsObjectOrPrefixType.PREFIX)
364        elif yield_objects:
365          key_to_convert = key
366
367          # Listed keys are populated with these fields during bucket listing.
368          key_http_fields = set(['bucket', 'etag', 'name', 'updated',
369                                 'generation', 'metageneration', 'size'])
370
371          # When fields == None, the caller is requesting all possible fields.
372          # If the caller requested any fields that are not populated by bucket
373          # listing, we'll need to make a separate HTTP call for each object to
374          # get its metadata and populate the remaining fields with the result.
375          if not get_fields or (get_fields and not
376                                get_fields.issubset(key_http_fields)):
377
378            generation = None
379            if getattr(key, 'generation', None):
380              generation = key.generation
381            if getattr(key, 'version_id', None):
382              generation = key.version_id
383            key_to_convert = self._GetBotoKey(bucket_name, key.name,
384                                              generation=generation)
385          return_object = self._BotoKeyToObject(key_to_convert,
386                                                fields=get_fields)
387
388          yield CloudApi.CsObjectOrPrefix(return_object,
389                                          CloudApi.CsObjectOrPrefixType.OBJECT)
390    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
391      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
392
393  def GetObjectMetadata(self, bucket_name, object_name, generation=None,
394                        provider=None, fields=None):
395    """See CloudApi class for function doc strings."""
396    _ = provider
397    try:
398      return self._BotoKeyToObject(self._GetBotoKey(bucket_name, object_name,
399                                                    generation=generation),
400                                   fields=fields)
401    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
402      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
403                                       object_name=object_name,
404                                       generation=generation)
405
406  def _CurryDigester(self, digester_object):
407    """Curries a digester object into a form consumable by boto.
408
409    Key instantiates its own digesters by calling hash_algs[alg]() [note there
410    are no arguments to this function].  So in order to pass in our caught-up
411    digesters during a resumable download, we need to pass the digester
412    object but don't get to look it up based on the algorithm name.  Here we
413    use a lambda to make lookup implicit.
414
415    Args:
416      digester_object: Input object to be returned by the created function.
417
418    Returns:
419      A function which when called will return the input object.
420    """
421    return lambda: digester_object
422
423  def GetObjectMedia(
424      self, bucket_name, object_name, download_stream, provider=None,
425      generation=None, object_size=None,
426      download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
427      start_byte=0, end_byte=None, progress_callback=None,
428      serialization_data=None, digesters=None):
429    """See CloudApi class for function doc strings."""
430    # This implementation will get the object metadata first if we don't pass it
431    # in via serialization_data.
432    headers = {}
433    self._AddApiVersionToHeaders(headers)
434    if 'accept-encoding' not in headers:
435      headers['accept-encoding'] = 'gzip'
436    if end_byte is not None:
437      headers['range'] = 'bytes=%s-%s' % (start_byte, end_byte)
438    elif start_byte > 0:
439      headers['range'] = 'bytes=%s-' % (start_byte)
440    elif start_byte < 0:
441      headers['range'] = 'bytes=%s' % (start_byte)
442
443    # Since in most cases we already made a call to get the object metadata,
444    # here we avoid an extra HTTP call by unpickling the key.  This is coupled
445    # with the implementation in _BotoKeyToObject.
446    if serialization_data:
447      serialization_dict = json.loads(serialization_data)
448      key = pickle.loads(binascii.a2b_base64(serialization_dict['url']))
449    else:
450      key = self._GetBotoKey(bucket_name, object_name, generation=generation)
451
452    if digesters and self.provider == 'gs':
453      hash_algs = {}
454      for alg in digesters:
455        hash_algs[alg] = self._CurryDigester(digesters[alg])
456    else:
457      hash_algs = {}
458
459    total_size = object_size or 0
460    if serialization_data:
461      total_size = json.loads(serialization_data)['total_size']
462
463    if total_size:
464      num_progress_callbacks = max(int(total_size) / TWO_MIB,
465                                   XML_PROGRESS_CALLBACKS)
466    else:
467      num_progress_callbacks = XML_PROGRESS_CALLBACKS
468
469    try:
470      if download_strategy is CloudApi.DownloadStrategy.RESUMABLE:
471        self._PerformResumableDownload(
472            download_stream, start_byte, end_byte, key,
473            headers=headers, callback=progress_callback,
474            num_callbacks=num_progress_callbacks, hash_algs=hash_algs)
475      elif download_strategy is CloudApi.DownloadStrategy.ONE_SHOT:
476        self._PerformSimpleDownload(
477            download_stream, key, progress_callback=progress_callback,
478            num_progress_callbacks=num_progress_callbacks, headers=headers,
479            hash_algs=hash_algs)
480      else:
481        raise ArgumentException('Unsupported DownloadStrategy: %s' %
482                                download_strategy)
483    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
484      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
485                                       object_name=object_name,
486                                       generation=generation)
487
488    if self.provider == 's3':
489      if digesters:
490
491        class HashToDigester(object):
492          """Wrapper class to expose hash digests.
493
494          boto creates its own digesters in s3's get_file, returning on-the-fly
495          hashes only by way of key.local_hashes.  To propagate the digest back
496          to the caller, this stub class implements the digest() function.
497          """
498
499          def __init__(self, hash_val):
500            self.hash_val = hash_val
501
502          def digest(self):  # pylint: disable=invalid-name
503            return self.hash_val
504
505        for alg_name in digesters:
506          if ((download_strategy == CloudApi.DownloadStrategy.RESUMABLE and
507               start_byte != 0) or
508              not ((getattr(key, 'local_hashes', None) and
509                    alg_name in key.local_hashes))):
510            # For resumable downloads, boto does not provide a mechanism to
511            # catch up the hash in the case of a partially complete download.
512            # In this case or in the case where no digest was successfully
513            # calculated, set the digester to None, which indicates that we'll
514            # need to manually calculate the hash from the local file once it
515            # is complete.
516            digesters[alg_name] = None
517          else:
518            # Use the on-the-fly hash.
519            digesters[alg_name] = HashToDigester(key.local_hashes[alg_name])
520
521  def _PerformSimpleDownload(self, download_stream, key, progress_callback=None,
522                             num_progress_callbacks=XML_PROGRESS_CALLBACKS,
523                             headers=None, hash_algs=None):
524    if not headers:
525      headers = {}
526      self._AddApiVersionToHeaders(headers)
527    try:
528      key.get_contents_to_file(download_stream, cb=progress_callback,
529                               num_cb=num_progress_callbacks, headers=headers,
530                               hash_algs=hash_algs)
531    except TypeError:  # s3 and mocks do not support hash_algs
532      key.get_contents_to_file(download_stream, cb=progress_callback,
533                               num_cb=num_progress_callbacks, headers=headers)
534
535  def _PerformResumableDownload(self, fp, start_byte, end_byte, key,
536                                headers=None, callback=None,
537                                num_callbacks=XML_PROGRESS_CALLBACKS,
538                                hash_algs=None):
539    """Downloads bytes from key to fp, resuming as needed.
540
541    Args:
542      fp: File pointer into which data should be downloaded.
543      start_byte: Start byte of the download.
544      end_byte: End byte of the download.
545      key: Key object from which data is to be downloaded
546      headers: Headers to send when retrieving the file
547      callback: (optional) a callback function that will be called to report
548           progress on the download.  The callback should accept two integer
549           parameters.  The first integer represents the number of
550           bytes that have been successfully transmitted from the service.  The
551           second represents the total number of bytes that need to be
552           transmitted.
553      num_callbacks: (optional) If a callback is specified with the callback
554           parameter, this determines the granularity of the callback
555           by defining the maximum number of times the callback will be
556           called during the file transfer.
557      hash_algs: Dict of hash algorithms to apply to downloaded bytes.
558
559    Raises:
560      ResumableDownloadException on error.
561    """
562    if not headers:
563      headers = {}
564      self._AddApiVersionToHeaders(headers)
565
566    retryable_exceptions = (httplib.HTTPException, IOError, socket.error,
567                            socket.gaierror)
568
569    debug = key.bucket.connection.debug
570
571    num_retries = GetNumRetries()
572    progress_less_iterations = 0
573    last_progress_byte = start_byte
574
575    while True:  # Retry as long as we're making progress.
576      try:
577        cb_handler = DownloadProxyCallbackHandler(start_byte, callback)
578        headers = headers.copy()
579        headers['Range'] = 'bytes=%d-%d' % (start_byte, end_byte)
580
581        # Disable AWSAuthConnection-level retry behavior, since that would
582        # cause downloads to restart from scratch.
583        try:
584          key.get_file(fp, headers, cb_handler.call, num_callbacks,
585                       override_num_retries=0, hash_algs=hash_algs)
586        except TypeError:
587          key.get_file(fp, headers, cb_handler.call, num_callbacks,
588                       override_num_retries=0)
589        fp.flush()
590        # Download succeeded.
591        return
592      except retryable_exceptions, e:
593        if debug >= 1:
594          self.logger.info('Caught exception (%s)', repr(e))
595        if isinstance(e, IOError) and e.errno == errno.EPIPE:
596          # Broken pipe error causes httplib to immediately
597          # close the socket (http://bugs.python.org/issue5542),
598          # so we need to close and reopen the key before resuming
599          # the download.
600          if self.provider == 's3':
601            key.get_file(fp, headers, cb_handler.call, num_callbacks,
602                         override_num_retries=0)
603          else:  # self.provider == 'gs'
604            key.get_file(fp, headers, cb_handler.call, num_callbacks,
605                         override_num_retries=0, hash_algs=hash_algs)
606      except BotoResumableDownloadException, e:
607        if (e.disposition ==
608            ResumableTransferDisposition.ABORT_CUR_PROCESS):
609          raise ResumableDownloadException(e.message)
610        else:
611          if debug >= 1:
612            self.logger.info('Caught ResumableDownloadException (%s) - will '
613                             'retry', e.message)
614
615      # At this point we had a re-tryable failure; see if made progress.
616      start_byte = fp.tell()
617      if start_byte > last_progress_byte:
618        last_progress_byte = start_byte
619        progress_less_iterations = 0
620      else:
621        progress_less_iterations += 1
622
623      if progress_less_iterations > num_retries:
624        # Don't retry any longer in the current process.
625        raise ResumableDownloadException(
626            'Too many resumable download attempts failed without '
627            'progress. You might try this download again later')
628
629      # Close the key, in case a previous download died partway
630      # through and left data in the underlying key HTTP buffer.
631      # Do this within a try/except block in case the connection is
632      # closed (since key.close() attempts to do a final read, in which
633      # case this read attempt would get an IncompleteRead exception,
634      # which we can safely ignore).
635      try:
636        key.close()
637      except httplib.IncompleteRead:
638        pass
639
640      sleep_time_secs = min(random.random() * (2 ** progress_less_iterations),
641                            GetMaxRetryDelay())
642      if debug >= 1:
643        self.logger.info(
644            'Got retryable failure (%d progress-less in a row).\nSleeping %d '
645            'seconds before re-trying', progress_less_iterations,
646            sleep_time_secs)
647      time.sleep(sleep_time_secs)
648
649  def PatchObjectMetadata(self, bucket_name, object_name, metadata,
650                          canned_acl=None, generation=None, preconditions=None,
651                          provider=None, fields=None):
652    """See CloudApi class for function doc strings."""
653    _ = provider
654    object_uri = self._StorageUriForObject(bucket_name, object_name,
655                                           generation=generation)
656
657    headers = {}
658    self._AddApiVersionToHeaders(headers)
659    meta_headers = HeadersFromObjectMetadata(metadata, self.provider)
660
661    metadata_plus = {}
662    metadata_minus = set()
663    metadata_changed = False
664    for k, v in meta_headers.iteritems():
665      metadata_changed = True
666      if v is None:
667        metadata_minus.add(k)
668      else:
669        metadata_plus[k] = v
670
671    self._AddPreconditionsToHeaders(preconditions, headers)
672
673    if metadata_changed:
674      try:
675        object_uri.set_metadata(metadata_plus, metadata_minus, False,
676                                headers=headers)
677      except TRANSLATABLE_BOTO_EXCEPTIONS, e:
678        self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
679                                         object_name=object_name,
680                                         generation=generation)
681
682    if metadata.acl:
683      boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
684      try:
685        object_uri.set_xml_acl(boto_acl.to_xml(), key_name=object_name)
686      except TRANSLATABLE_BOTO_EXCEPTIONS, e:
687        self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
688                                         object_name=object_name,
689                                         generation=generation)
690    if canned_acl:
691      canned_acls = object_uri.canned_acls()
692      if canned_acl not in canned_acls:
693        raise CommandException('Invalid canned ACL "%s".' % canned_acl)
694      object_uri.set_acl(canned_acl, object_uri.object_name)
695
696    return self.GetObjectMetadata(bucket_name, object_name,
697                                  generation=generation, fields=fields)
698
699  def _PerformSimpleUpload(self, dst_uri, upload_stream, md5=None,
700                           canned_acl=None, progress_callback=None,
701                           headers=None):
702    dst_uri.set_contents_from_file(upload_stream, md5=md5, policy=canned_acl,
703                                   cb=progress_callback, headers=headers)
704
705  def _PerformStreamingUpload(self, dst_uri, upload_stream, canned_acl=None,
706                              progress_callback=None, headers=None):
707    if dst_uri.get_provider().supports_chunked_transfer():
708      dst_uri.set_contents_from_stream(upload_stream, policy=canned_acl,
709                                       cb=progress_callback, headers=headers)
710    else:
711      # Provider doesn't support chunked transfer, so copy to a temporary
712      # file.
713      (temp_fh, temp_path) = tempfile.mkstemp()
714      try:
715        with open(temp_path, 'wb') as out_fp:
716          stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
717          while stream_bytes:
718            out_fp.write(stream_bytes)
719            stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
720        with open(temp_path, 'rb') as in_fp:
721          dst_uri.set_contents_from_file(in_fp, policy=canned_acl,
722                                         headers=headers)
723      finally:
724        os.close(temp_fh)
725        os.unlink(temp_path)
726
727  def _PerformResumableUpload(self, key, upload_stream, upload_size,
728                              tracker_callback, canned_acl=None,
729                              serialization_data=None, progress_callback=None,
730                              headers=None):
731    resumable_upload = BotoResumableUpload(
732        tracker_callback, self.logger, resume_url=serialization_data)
733    resumable_upload.SendFile(key, upload_stream, upload_size,
734                              canned_acl=canned_acl, cb=progress_callback,
735                              headers=headers)
736
737  def _UploadSetup(self, object_metadata, preconditions=None):
738    """Shared upload implementation.
739
740    Args:
741      object_metadata: Object metadata describing destination object.
742      preconditions: Optional gsutil Cloud API preconditions.
743
744    Returns:
745      Headers dictionary, StorageUri for upload (based on inputs)
746    """
747    ValidateDstObjectMetadata(object_metadata)
748
749    headers = HeadersFromObjectMetadata(object_metadata, self.provider)
750    self._AddApiVersionToHeaders(headers)
751
752    if object_metadata.crc32c:
753      if 'x-goog-hash' in headers:
754        headers['x-goog-hash'] += (
755            ',crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
756      else:
757        headers['x-goog-hash'] = (
758            'crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
759    if object_metadata.md5Hash:
760      if 'x-goog-hash' in headers:
761        headers['x-goog-hash'] += (
762            ',md5=%s' % object_metadata.md5Hash.rstrip('\n'))
763      else:
764        headers['x-goog-hash'] = (
765            'md5=%s' % object_metadata.md5Hash.rstrip('\n'))
766
767    if 'content-type' in headers and not headers['content-type']:
768      headers['content-type'] = 'application/octet-stream'
769
770    self._AddPreconditionsToHeaders(preconditions, headers)
771
772    dst_uri = self._StorageUriForObject(object_metadata.bucket,
773                                        object_metadata.name)
774    return headers, dst_uri
775
776  def _HandleSuccessfulUpload(self, dst_uri, object_metadata, fields=None):
777    """Set ACLs on an uploaded object and return its metadata.
778
779    Args:
780      dst_uri: Generation-specific StorageUri describing the object.
781      object_metadata: Metadata for the object, including an ACL if applicable.
782      fields: If present, return only these Object metadata fields.
783
784    Returns:
785      gsutil Cloud API Object metadata.
786
787    Raises:
788      CommandException if the object was overwritten / deleted concurrently.
789    """
790    try:
791      # The XML API does not support if-generation-match for GET requests.
792      # Therefore, if the object gets overwritten before the ACL and get_key
793      # operations, the best we can do is warn that it happened.
794      self._SetObjectAcl(object_metadata, dst_uri)
795      return self._BotoKeyToObject(dst_uri.get_key(), fields=fields)
796    except boto.exception.InvalidUriError as e:
797      if e.message and NON_EXISTENT_OBJECT_REGEX.match(e.message.encode(UTF8)):
798        raise CommandException('\n'.join(textwrap.wrap(
799            'Uploaded object (%s) was deleted or overwritten immediately '
800            'after it was uploaded. This can happen if you attempt to upload '
801            'to the same object multiple times concurrently.' % dst_uri.uri)))
802      else:
803        raise
804
805  def _SetObjectAcl(self, object_metadata, dst_uri):
806    """Sets the ACL (if present in object_metadata) on an uploaded object."""
807    if object_metadata.acl:
808      boto_acl = AclTranslation.BotoAclFromMessage(object_metadata.acl)
809      dst_uri.set_xml_acl(boto_acl.to_xml())
810    elif self.provider == 's3':
811      s3_acl = S3MarkerAclFromObjectMetadata(object_metadata)
812      if s3_acl:
813        dst_uri.set_xml_acl(s3_acl)
814
815  def UploadObjectResumable(
816      self, upload_stream, object_metadata, canned_acl=None, preconditions=None,
817      provider=None, fields=None, size=None, serialization_data=None,
818      tracker_callback=None, progress_callback=None):
819    """See CloudApi class for function doc strings."""
820    if self.provider == 's3':
821      # Resumable uploads are not supported for s3.
822      return self.UploadObject(
823          upload_stream, object_metadata, canned_acl=canned_acl,
824          preconditions=preconditions, fields=fields, size=size)
825    headers, dst_uri = self._UploadSetup(object_metadata,
826                                         preconditions=preconditions)
827    if not tracker_callback:
828      raise ArgumentException('No tracker callback function set for '
829                              'resumable upload of %s' % dst_uri)
830    try:
831      self._PerformResumableUpload(dst_uri.new_key(headers=headers),
832                                   upload_stream, size, tracker_callback,
833                                   canned_acl=canned_acl,
834                                   serialization_data=serialization_data,
835                                   progress_callback=progress_callback,
836                                   headers=headers)
837      return self._HandleSuccessfulUpload(dst_uri, object_metadata,
838                                          fields=fields)
839    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
840      not_found_exception = CreateNotFoundExceptionForObjectWrite(
841          self.provider, object_metadata.bucket)
842      self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
843                                       object_name=object_metadata.name,
844                                       not_found_exception=not_found_exception)
845
846  def UploadObjectStreaming(self, upload_stream, object_metadata,
847                            canned_acl=None, progress_callback=None,
848                            preconditions=None, provider=None, fields=None):
849    """See CloudApi class for function doc strings."""
850    headers, dst_uri = self._UploadSetup(object_metadata,
851                                         preconditions=preconditions)
852
853    try:
854      self._PerformStreamingUpload(
855          dst_uri, upload_stream, canned_acl=canned_acl,
856          progress_callback=progress_callback, headers=headers)
857      return self._HandleSuccessfulUpload(dst_uri, object_metadata,
858                                          fields=fields)
859    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
860      not_found_exception = CreateNotFoundExceptionForObjectWrite(
861          self.provider, object_metadata.bucket)
862      self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
863                                       object_name=object_metadata.name,
864                                       not_found_exception=not_found_exception)
865
866  def UploadObject(self, upload_stream, object_metadata, canned_acl=None,
867                   preconditions=None, size=None, progress_callback=None,
868                   provider=None, fields=None):
869    """See CloudApi class for function doc strings."""
870    headers, dst_uri = self._UploadSetup(object_metadata,
871                                         preconditions=preconditions)
872
873    try:
874      md5 = None
875      if object_metadata.md5Hash:
876        md5 = []
877        # boto expects hex at index 0, base64 at index 1
878        md5.append(Base64ToHexHash(object_metadata.md5Hash))
879        md5.append(object_metadata.md5Hash.strip('\n"\''))
880      self._PerformSimpleUpload(dst_uri, upload_stream, md5=md5,
881                                canned_acl=canned_acl,
882                                progress_callback=progress_callback,
883                                headers=headers)
884      return self._HandleSuccessfulUpload(dst_uri, object_metadata,
885                                          fields=fields)
886    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
887      not_found_exception = CreateNotFoundExceptionForObjectWrite(
888          self.provider, object_metadata.bucket)
889      self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
890                                       object_name=object_metadata.name,
891                                       not_found_exception=not_found_exception)
892
893  def DeleteObject(self, bucket_name, object_name, preconditions=None,
894                   generation=None, provider=None):
895    """See CloudApi class for function doc strings."""
896    _ = provider
897    headers = {}
898    self._AddApiVersionToHeaders(headers)
899    self._AddPreconditionsToHeaders(preconditions, headers)
900
901    uri = self._StorageUriForObject(bucket_name, object_name,
902                                    generation=generation)
903    try:
904      uri.delete_key(validate=False, headers=headers)
905    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
906      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
907                                       object_name=object_name,
908                                       generation=generation)
909
910  def CopyObject(self, src_obj_metadata, dst_obj_metadata, src_generation=None,
911                 canned_acl=None, preconditions=None, progress_callback=None,
912                 max_bytes_per_call=None, provider=None, fields=None):
913    """See CloudApi class for function doc strings."""
914    _ = provider
915
916    if max_bytes_per_call is not None:
917      raise NotImplementedError('XML API does not suport max_bytes_per_call')
918    dst_uri = self._StorageUriForObject(dst_obj_metadata.bucket,
919                                        dst_obj_metadata.name)
920
921    # Usually it's okay to treat version_id and generation as
922    # the same, but in this case the underlying boto call determines the
923    # provider based on the presence of one or the other.
924    src_version_id = None
925    if self.provider == 's3':
926      src_version_id = src_generation
927      src_generation = None
928
929    headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
930    self._AddApiVersionToHeaders(headers)
931    self._AddPreconditionsToHeaders(preconditions, headers)
932
933    if canned_acl:
934      headers[dst_uri.get_provider().acl_header] = canned_acl
935
936    preserve_acl = True if dst_obj_metadata.acl else False
937    if self.provider == 's3':
938      s3_acl = S3MarkerAclFromObjectMetadata(dst_obj_metadata)
939      if s3_acl:
940        preserve_acl = True
941
942    try:
943      new_key = dst_uri.copy_key(
944          src_obj_metadata.bucket, src_obj_metadata.name,
945          preserve_acl=preserve_acl, headers=headers,
946          src_version_id=src_version_id, src_generation=src_generation)
947
948      return self._BotoKeyToObject(new_key, fields=fields)
949    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
950      not_found_exception = CreateNotFoundExceptionForObjectWrite(
951          self.provider, dst_obj_metadata.bucket, src_provider=self.provider,
952          src_bucket_name=src_obj_metadata.bucket,
953          src_object_name=src_obj_metadata.name, src_generation=src_generation)
954      self._TranslateExceptionAndRaise(e, bucket_name=dst_obj_metadata.bucket,
955                                       object_name=dst_obj_metadata.name,
956                                       not_found_exception=not_found_exception)
957
958  def ComposeObject(self, src_objs_metadata, dst_obj_metadata,
959                    preconditions=None, provider=None, fields=None):
960    """See CloudApi class for function doc strings."""
961    _ = provider
962    ValidateDstObjectMetadata(dst_obj_metadata)
963
964    dst_obj_name = dst_obj_metadata.name
965    dst_obj_metadata.name = None
966    dst_bucket_name = dst_obj_metadata.bucket
967    dst_obj_metadata.bucket = None
968    headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
969    if not dst_obj_metadata.contentType:
970      dst_obj_metadata.contentType = DEFAULT_CONTENT_TYPE
971      headers['content-type'] = dst_obj_metadata.contentType
972    self._AddApiVersionToHeaders(headers)
973    self._AddPreconditionsToHeaders(preconditions, headers)
974
975    dst_uri = self._StorageUriForObject(dst_bucket_name, dst_obj_name)
976
977    src_components = []
978    for src_obj in src_objs_metadata:
979      src_uri = self._StorageUriForObject(dst_bucket_name, src_obj.name,
980                                          generation=src_obj.generation)
981      src_components.append(src_uri)
982
983    try:
984      dst_uri.compose(src_components, headers=headers)
985
986      return self.GetObjectMetadata(dst_bucket_name, dst_obj_name,
987                                    fields=fields)
988    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
989      self._TranslateExceptionAndRaise(e, dst_obj_metadata.bucket,
990                                       dst_obj_metadata.name)
991
992  def _AddPreconditionsToHeaders(self, preconditions, headers):
993    """Adds preconditions (if any) to headers."""
994    if preconditions and self.provider == 'gs':
995      if preconditions.gen_match is not None:
996        headers['x-goog-if-generation-match'] = str(preconditions.gen_match)
997      if preconditions.meta_gen_match is not None:
998        headers['x-goog-if-metageneration-match'] = str(
999            preconditions.meta_gen_match)
1000
1001  def _AddApiVersionToHeaders(self, headers):
1002    if self.provider == 'gs':
1003      headers['x-goog-api-version'] = self.api_version
1004
1005  def _GetMD5FromETag(self, src_etag):
1006    """Returns an MD5 from the etag iff the etag is a valid MD5 hash.
1007
1008    Args:
1009      src_etag: Object etag for which to return the MD5.
1010
1011    Returns:
1012      MD5 in hex string format, or None.
1013    """
1014    if src_etag and MD5_REGEX.search(src_etag):
1015      return src_etag.strip('"\'').lower()
1016
1017  def _StorageUriForBucket(self, bucket):
1018    """Returns a boto storage_uri for the given bucket name.
1019
1020    Args:
1021      bucket: Bucket name (string).
1022
1023    Returns:
1024      Boto storage_uri for the bucket.
1025    """
1026    return boto.storage_uri(
1027        '%s://%s' % (self.provider, bucket),
1028        suppress_consec_slashes=False,
1029        bucket_storage_uri_class=self.bucket_storage_uri_class,
1030        debug=self.debug)
1031
1032  def _StorageUriForObject(self, bucket, object_name, generation=None):
1033    """Returns a boto storage_uri for the given object.
1034
1035    Args:
1036      bucket: Bucket name (string).
1037      object_name: Object name (string).
1038      generation: Generation or version_id of object.  If None, live version
1039                  of the object is used.
1040
1041    Returns:
1042      Boto storage_uri for the object.
1043    """
1044    uri_string = '%s://%s/%s' % (self.provider, bucket, object_name)
1045    if generation:
1046      uri_string += '#%s' % generation
1047    return boto.storage_uri(
1048        uri_string, suppress_consec_slashes=False,
1049        bucket_storage_uri_class=self.bucket_storage_uri_class,
1050        debug=self.debug)
1051
1052  def _GetBotoKey(self, bucket_name, object_name, generation=None):
1053    """Gets the boto key for an object.
1054
1055    Args:
1056      bucket_name: Bucket containing the object.
1057      object_name: Object name.
1058      generation: Generation or version of the object to retrieve.
1059
1060    Returns:
1061      Boto key for the object.
1062    """
1063    object_uri = self._StorageUriForObject(bucket_name, object_name,
1064                                           generation=generation)
1065    try:
1066      key = object_uri.get_key()
1067      if not key:
1068        raise CreateObjectNotFoundException('404', self.provider,
1069                                            bucket_name, object_name,
1070                                            generation=generation)
1071      return key
1072    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1073      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
1074                                       object_name=object_name,
1075                                       generation=generation)
1076
1077  def _ListToGetFields(self, list_fields=None):
1078    """Removes 'items/' from the input fields and converts it to a set.
1079
1080    This way field sets requested for ListBucket/ListObject can be used in
1081    _BotoBucketToBucket and _BotoKeyToObject calls.
1082
1083    Args:
1084      list_fields: Iterable fields usable in ListBucket/ListObject calls.
1085
1086    Returns:
1087      Set of fields usable in GetBucket/GetObject or
1088      _BotoBucketToBucket/_BotoKeyToObject calls.
1089    """
1090    if list_fields:
1091      get_fields = set()
1092      for field in list_fields:
1093        if field in ['kind', 'nextPageToken', 'prefixes']:
1094          # These are not actually object / bucket metadata fields.
1095          # They are fields specific to listing, so we don't consider them.
1096          continue
1097        get_fields.add(re.sub(r'items/', '', field))
1098      return get_fields
1099
1100  # pylint: disable=too-many-statements
1101  def _BotoBucketToBucket(self, bucket, fields=None):
1102    """Constructs an apitools Bucket from a boto bucket.
1103
1104    Args:
1105      bucket: Boto bucket.
1106      fields: If present, construct the apitools Bucket with only this set of
1107              metadata fields.
1108
1109    Returns:
1110      apitools Bucket.
1111    """
1112    bucket_uri = self._StorageUriForBucket(bucket.name)
1113
1114    cloud_api_bucket = apitools_messages.Bucket(name=bucket.name,
1115                                                id=bucket.name)
1116    headers = {}
1117    self._AddApiVersionToHeaders(headers)
1118    if self.provider == 'gs':
1119      if not fields or 'storageClass' in fields:
1120        if hasattr(bucket, 'get_storage_class'):
1121          cloud_api_bucket.storageClass = bucket.get_storage_class()
1122      if not fields or 'acl' in fields:
1123        for acl in AclTranslation.BotoBucketAclToMessage(
1124            bucket.get_acl(headers=headers)):
1125          try:
1126            cloud_api_bucket.acl.append(acl)
1127          except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1128            translated_exception = self._TranslateBotoException(
1129                e, bucket_name=bucket.name)
1130            if (translated_exception and
1131                isinstance(translated_exception,
1132                           AccessDeniedException)):
1133              # JSON API doesn't differentiate between a blank ACL list
1134              # and an access denied, so this is intentionally left blank.
1135              pass
1136            else:
1137              self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1138      if not fields or 'cors' in fields:
1139        try:
1140          boto_cors = bucket_uri.get_cors()
1141          cloud_api_bucket.cors = CorsTranslation.BotoCorsToMessage(boto_cors)
1142        except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1143          self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1144      if not fields or 'defaultObjectAcl' in fields:
1145        for acl in AclTranslation.BotoObjectAclToMessage(
1146            bucket.get_def_acl(headers=headers)):
1147          try:
1148            cloud_api_bucket.defaultObjectAcl.append(acl)
1149          except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1150            translated_exception = self._TranslateBotoException(
1151                e, bucket_name=bucket.name)
1152            if (translated_exception and
1153                isinstance(translated_exception,
1154                           AccessDeniedException)):
1155              # JSON API doesn't differentiate between a blank ACL list
1156              # and an access denied, so this is intentionally left blank.
1157              pass
1158            else:
1159              self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1160      if not fields or 'lifecycle' in fields:
1161        try:
1162          boto_lifecycle = bucket_uri.get_lifecycle_config()
1163          cloud_api_bucket.lifecycle = (
1164              LifecycleTranslation.BotoLifecycleToMessage(boto_lifecycle))
1165        except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1166          self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1167      if not fields or 'logging' in fields:
1168        try:
1169          boto_logging = bucket_uri.get_logging_config()
1170          if boto_logging and 'Logging' in boto_logging:
1171            logging_config = boto_logging['Logging']
1172            log_object_prefix_present = 'LogObjectPrefix' in logging_config
1173            log_bucket_present = 'LogBucket' in logging_config
1174            if log_object_prefix_present or log_bucket_present:
1175              cloud_api_bucket.logging = apitools_messages.Bucket.LoggingValue()
1176              if log_object_prefix_present:
1177                cloud_api_bucket.logging.logObjectPrefix = (
1178                    logging_config['LogObjectPrefix'])
1179              if log_bucket_present:
1180                cloud_api_bucket.logging.logBucket = logging_config['LogBucket']
1181        except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1182          self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1183      if not fields or 'website' in fields:
1184        try:
1185          boto_website = bucket_uri.get_website_config()
1186          if boto_website and 'WebsiteConfiguration' in boto_website:
1187            website_config = boto_website['WebsiteConfiguration']
1188            main_page_suffix_present = 'MainPageSuffix' in website_config
1189            not_found_page_present = 'NotFoundPage' in website_config
1190            if main_page_suffix_present or not_found_page_present:
1191              cloud_api_bucket.website = apitools_messages.Bucket.WebsiteValue()
1192              if main_page_suffix_present:
1193                cloud_api_bucket.website.mainPageSuffix = (
1194                    website_config['MainPageSuffix'])
1195              if not_found_page_present:
1196                cloud_api_bucket.website.notFoundPage = (
1197                    website_config['NotFoundPage'])
1198        except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1199          self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1200      if not fields or 'location' in fields:
1201        cloud_api_bucket.location = bucket_uri.get_location()
1202    if not fields or 'versioning' in fields:
1203      versioning = bucket_uri.get_versioning_config(headers=headers)
1204      if versioning:
1205        if (self.provider == 's3' and 'Versioning' in versioning and
1206            versioning['Versioning'] == 'Enabled'):
1207          cloud_api_bucket.versioning = (
1208              apitools_messages.Bucket.VersioningValue(enabled=True))
1209        elif self.provider == 'gs':
1210          cloud_api_bucket.versioning = (
1211              apitools_messages.Bucket.VersioningValue(enabled=True))
1212
1213    # For S3 long bucket listing we do not support CORS, lifecycle, website, and
1214    # logging translation. The individual commands can be used to get
1215    # the XML equivalents for S3.
1216    return cloud_api_bucket
1217
1218  def _BotoKeyToObject(self, key, fields=None):
1219    """Constructs an apitools Object from a boto key.
1220
1221    Args:
1222      key: Boto key to construct Object from.
1223      fields: If present, construct the apitools Object with only this set of
1224              metadata fields.
1225
1226    Returns:
1227      apitools Object corresponding to key.
1228    """
1229    custom_metadata = None
1230    if not fields or 'metadata' in fields:
1231      custom_metadata = self._TranslateBotoKeyCustomMetadata(key)
1232    cache_control = None
1233    if not fields or 'cacheControl' in fields:
1234      cache_control = getattr(key, 'cache_control', None)
1235    component_count = None
1236    if not fields or 'componentCount' in fields:
1237      component_count = getattr(key, 'component_count', None)
1238    content_disposition = None
1239    if not fields or 'contentDisposition' in fields:
1240      content_disposition = getattr(key, 'content_disposition', None)
1241    # Other fields like updated and ACL depend on the generation
1242    # of the object, so populate that regardless of whether it was requested.
1243    generation = self._TranslateBotoKeyGeneration(key)
1244    metageneration = None
1245    if not fields or 'metageneration' in fields:
1246      metageneration = self._TranslateBotoKeyMetageneration(key)
1247    updated = None
1248    # Translation code to avoid a dependency on dateutil.
1249    if not fields or 'updated' in fields:
1250      updated = self._TranslateBotoKeyTimestamp(key)
1251    etag = None
1252    if not fields or 'etag' in fields:
1253      etag = getattr(key, 'etag', None)
1254      if etag:
1255        etag = etag.strip('"\'')
1256    crc32c = None
1257    if not fields or 'crc32c' in fields:
1258      if hasattr(key, 'cloud_hashes') and 'crc32c' in key.cloud_hashes:
1259        crc32c = base64.encodestring(key.cloud_hashes['crc32c']).rstrip('\n')
1260    md5_hash = None
1261    if not fields or 'md5Hash' in fields:
1262      if hasattr(key, 'cloud_hashes') and 'md5' in key.cloud_hashes:
1263        md5_hash = base64.encodestring(key.cloud_hashes['md5']).rstrip('\n')
1264      elif self._GetMD5FromETag(getattr(key, 'etag', None)):
1265        md5_hash = Base64EncodeHash(self._GetMD5FromETag(key.etag))
1266      elif self.provider == 's3':
1267        # S3 etags are MD5s for non-multi-part objects, but multi-part objects
1268        # (which include all objects >= 5 GB) have a custom checksum
1269        # implementation that is not currently supported by gsutil.
1270        self.logger.warn(
1271            'Non-MD5 etag (%s) present for key %s, data integrity checks are '
1272            'not possible.', key.etag, key)
1273
1274    # Serialize the boto key in the media link if it is requested.  This
1275    # way we can later access the key without adding an HTTP call.
1276    media_link = None
1277    if not fields or 'mediaLink' in fields:
1278      media_link = binascii.b2a_base64(
1279          pickle.dumps(key, pickle.HIGHEST_PROTOCOL))
1280    size = None
1281    if not fields or 'size' in fields:
1282      size = key.size or 0
1283    storage_class = None
1284    if not fields or 'storageClass' in fields:
1285      storage_class = getattr(key, 'storage_class', None)
1286
1287    cloud_api_object = apitools_messages.Object(
1288        bucket=key.bucket.name,
1289        name=key.name,
1290        size=size,
1291        contentEncoding=key.content_encoding,
1292        contentLanguage=key.content_language,
1293        contentType=key.content_type,
1294        cacheControl=cache_control,
1295        contentDisposition=content_disposition,
1296        etag=etag,
1297        crc32c=crc32c,
1298        md5Hash=md5_hash,
1299        generation=generation,
1300        metageneration=metageneration,
1301        componentCount=component_count,
1302        updated=updated,
1303        metadata=custom_metadata,
1304        mediaLink=media_link,
1305        storageClass=storage_class)
1306
1307    # Remaining functions amend cloud_api_object.
1308    self._TranslateDeleteMarker(key, cloud_api_object)
1309    if not fields or 'acl' in fields:
1310      generation_str = GenerationFromUrlAndString(
1311          StorageUrlFromString(self.provider), generation)
1312      self._TranslateBotoKeyAcl(key, cloud_api_object,
1313                                generation=generation_str)
1314
1315    return cloud_api_object
1316
1317  def _TranslateBotoKeyCustomMetadata(self, key):
1318    """Populates an apitools message from custom metadata in the boto key."""
1319    custom_metadata = None
1320    if getattr(key, 'metadata', None):
1321      custom_metadata = apitools_messages.Object.MetadataValue(
1322          additionalProperties=[])
1323      for k, v in key.metadata.iteritems():
1324        if k.lower() == 'content-language':
1325          # Work around content-language being inserted into custom metadata.
1326          continue
1327        custom_metadata.additionalProperties.append(
1328            apitools_messages.Object.MetadataValue.AdditionalProperty(
1329                key=k, value=v))
1330    return custom_metadata
1331
1332  def _TranslateBotoKeyGeneration(self, key):
1333    """Returns the generation/version_id number from the boto key if present."""
1334    generation = None
1335    if self.provider == 'gs':
1336      if getattr(key, 'generation', None):
1337        generation = long(key.generation)
1338    elif self.provider == 's3':
1339      if getattr(key, 'version_id', None):
1340        generation = EncodeStringAsLong(key.version_id)
1341    return generation
1342
1343  def _TranslateBotoKeyMetageneration(self, key):
1344    """Returns the metageneration number from the boto key if present."""
1345    metageneration = None
1346    if self.provider == 'gs':
1347      if getattr(key, 'metageneration', None):
1348        metageneration = long(key.metageneration)
1349    return metageneration
1350
1351  def _TranslateBotoKeyTimestamp(self, key):
1352    """Parses the timestamp from the boto key into an datetime object.
1353
1354    This avoids a dependency on dateutil.
1355
1356    Args:
1357      key: Boto key to get timestamp from.
1358
1359    Returns:
1360      datetime object if string is parsed successfully, None otherwise.
1361    """
1362    if key.last_modified:
1363      if '.' in key.last_modified:
1364        key_us_timestamp = key.last_modified.rstrip('Z') + '000Z'
1365      else:
1366        key_us_timestamp = key.last_modified.rstrip('Z') + '.000000Z'
1367      fmt = '%Y-%m-%dT%H:%M:%S.%fZ'
1368      try:
1369        return datetime.datetime.strptime(key_us_timestamp, fmt)
1370      except ValueError:
1371        try:
1372          # Try alternate format
1373          fmt = '%a, %d %b %Y %H:%M:%S %Z'
1374          return datetime.datetime.strptime(key.last_modified, fmt)
1375        except ValueError:
1376          # Could not parse the time; leave updated as None.
1377          return None
1378
1379  def _TranslateDeleteMarker(self, key, cloud_api_object):
1380    """Marks deleted objects with a metadata value (for S3 compatibility)."""
1381    if isinstance(key, DeleteMarker):
1382      if not cloud_api_object.metadata:
1383        cloud_api_object.metadata = apitools_messages.Object.MetadataValue()
1384        cloud_api_object.metadata.additionalProperties = []
1385      cloud_api_object.metadata.additionalProperties.append(
1386          apitools_messages.Object.MetadataValue.AdditionalProperty(
1387              key=S3_DELETE_MARKER_GUID, value=True))
1388
1389  def _TranslateBotoKeyAcl(self, key, cloud_api_object, generation=None):
1390    """Updates cloud_api_object with the ACL from the boto key."""
1391    storage_uri_for_key = self._StorageUriForObject(key.bucket.name, key.name,
1392                                                    generation=generation)
1393    headers = {}
1394    self._AddApiVersionToHeaders(headers)
1395    try:
1396      if self.provider == 'gs':
1397        key_acl = storage_uri_for_key.get_acl(headers=headers)
1398        # key.get_acl() does not support versioning so we need to use
1399        # storage_uri to ensure we're getting the versioned ACL.
1400        for acl in AclTranslation.BotoObjectAclToMessage(key_acl):
1401          cloud_api_object.acl.append(acl)
1402      if self.provider == 's3':
1403        key_acl = key.get_xml_acl(headers=headers)
1404        # ACLs for s3 are different and we use special markers to represent
1405        # them in the gsutil Cloud API.
1406        AddS3MarkerAclToObjectMetadata(cloud_api_object, key_acl)
1407    except boto.exception.GSResponseError, e:
1408      if e.status == 403:
1409        # Consume access denied exceptions to mimic JSON behavior of simply
1410        # returning None if sufficient permission is not present.  The caller
1411        # needs to handle the case where the ACL is not populated.
1412        pass
1413      else:
1414        raise
1415
1416  def _TranslateExceptionAndRaise(self, e, bucket_name=None, object_name=None,
1417                                  generation=None, not_found_exception=None):
1418    """Translates a Boto exception and raises the translated or original value.
1419
1420    Args:
1421      e: Any Exception.
1422      bucket_name: Optional bucket name in request that caused the exception.
1423      object_name: Optional object name in request that caused the exception.
1424      generation: Optional generation in request that caused the exception.
1425      not_found_exception: Optional exception to raise in the not-found case.
1426
1427    Raises:
1428      Translated CloudApi exception, or the original exception if it was not
1429      translatable.
1430    """
1431    translated_exception = self._TranslateBotoException(
1432        e, bucket_name=bucket_name, object_name=object_name,
1433        generation=generation, not_found_exception=not_found_exception)
1434    if translated_exception:
1435      raise translated_exception
1436    else:
1437      raise
1438
1439  def _TranslateBotoException(self, e, bucket_name=None, object_name=None,
1440                              generation=None, not_found_exception=None):
1441    """Translates boto exceptions into their gsutil Cloud API equivalents.
1442
1443    Args:
1444      e: Any exception in TRANSLATABLE_BOTO_EXCEPTIONS.
1445      bucket_name: Optional bucket name in request that caused the exception.
1446      object_name: Optional object name in request that caused the exception.
1447      generation: Optional generation in request that caused the exception.
1448      not_found_exception: Optional exception to raise in the not-found case.
1449
1450    Returns:
1451      CloudStorageApiServiceException for translatable exceptions, None
1452      otherwise.
1453
1454    Because we're using isinstance, check for subtypes first.
1455    """
1456    if isinstance(e, boto.exception.StorageResponseError):
1457      if e.status == 400:
1458        return BadRequestException(e.code, status=e.status, body=e.body)
1459      elif e.status == 401 or e.status == 403:
1460        return AccessDeniedException(e.code, status=e.status, body=e.body)
1461      elif e.status == 404:
1462        if not_found_exception:
1463          # The exception is pre-constructed prior to translation; the HTTP
1464          # status code isn't available at that time.
1465          setattr(not_found_exception, 'status', e.status)
1466          return not_found_exception
1467        elif bucket_name:
1468          if object_name:
1469            return CreateObjectNotFoundException(e.status, self.provider,
1470                                                 bucket_name, object_name,
1471                                                 generation=generation)
1472          return CreateBucketNotFoundException(e.status, self.provider,
1473                                               bucket_name)
1474        return NotFoundException(e.message, status=e.status, body=e.body)
1475
1476      elif e.status == 409 and e.code and 'BucketNotEmpty' in e.code:
1477        return NotEmptyException('BucketNotEmpty (%s)' % bucket_name,
1478                                 status=e.status, body=e.body)
1479      elif e.status == 410:
1480        # 410 errors should always cause us to start over - either the UploadID
1481        # has expired or there was a server-side problem that requires starting
1482        # the upload over from scratch.
1483        return ResumableUploadStartOverException(e.message)
1484      elif e.status == 412:
1485        return PreconditionException(e.code, status=e.status, body=e.body)
1486    if isinstance(e, boto.exception.StorageCreateError):
1487      return ServiceException('Bucket already exists.', status=e.status,
1488                              body=e.body)
1489
1490    if isinstance(e, boto.exception.BotoServerError):
1491      return ServiceException(e.message, status=e.status, body=e.body)
1492
1493    if isinstance(e, boto.exception.InvalidUriError):
1494      # Work around textwrap when searching for this string.
1495      if e.message and NON_EXISTENT_OBJECT_REGEX.match(e.message.encode(UTF8)):
1496        return NotFoundException(e.message, status=404)
1497      return InvalidUrlError(e.message)
1498
1499    if isinstance(e, boto.exception.ResumableUploadException):
1500      if e.disposition == boto.exception.ResumableTransferDisposition.ABORT:
1501        return ResumableUploadAbortException(e.message)
1502      elif (e.disposition ==
1503            boto.exception.ResumableTransferDisposition.START_OVER):
1504        return ResumableUploadStartOverException(e.message)
1505      else:
1506        return ResumableUploadException(e.message)
1507
1508    if isinstance(e, boto.exception.ResumableDownloadException):
1509      return ResumableDownloadException(e.message)
1510
1511    return None
1512
1513  # For function docstrings, see CloudApiDelegator class.
1514  def XmlPassThroughGetAcl(self, storage_url, def_obj_acl=False):
1515    """See CloudApiDelegator class for function doc strings."""
1516    try:
1517      uri = boto.storage_uri(
1518          storage_url.url_string, suppress_consec_slashes=False,
1519          bucket_storage_uri_class=self.bucket_storage_uri_class,
1520          debug=self.debug)
1521      if def_obj_acl:
1522        return uri.get_def_acl()
1523      else:
1524        return uri.get_acl()
1525    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1526      self._TranslateExceptionAndRaise(e)
1527
1528  def XmlPassThroughSetAcl(self, acl_text, storage_url, canned=True,
1529                           def_obj_acl=False):
1530    """See CloudApiDelegator class for function doc strings."""
1531    try:
1532      uri = boto.storage_uri(
1533          storage_url.url_string, suppress_consec_slashes=False,
1534          bucket_storage_uri_class=self.bucket_storage_uri_class,
1535          debug=self.debug)
1536      if canned:
1537        if def_obj_acl:
1538          canned_acls = uri.canned_acls()
1539          if acl_text not in canned_acls:
1540            raise CommandException('Invalid canned ACL "%s".' % acl_text)
1541          uri.set_def_acl(acl_text, uri.object_name)
1542        else:
1543          canned_acls = uri.canned_acls()
1544          if acl_text not in canned_acls:
1545            raise CommandException('Invalid canned ACL "%s".' % acl_text)
1546          uri.set_acl(acl_text, uri.object_name)
1547      else:
1548        if def_obj_acl:
1549          uri.set_def_xml_acl(acl_text, uri.object_name)
1550        else:
1551          uri.set_xml_acl(acl_text, uri.object_name)
1552    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1553      self._TranslateExceptionAndRaise(e)
1554
1555  # pylint: disable=catching-non-exception
1556  def XmlPassThroughSetCors(self, cors_text, storage_url):
1557    """See CloudApiDelegator class for function doc strings."""
1558    # Parse XML document and convert into Cors object.
1559    if storage_url.scheme == 's3':
1560      cors_obj = S3Cors()
1561    else:
1562      cors_obj = Cors()
1563    h = handler.XmlHandler(cors_obj, None)
1564    try:
1565      xml.sax.parseString(cors_text, h)
1566    except SaxExceptions.SAXParseException, e:
1567      raise CommandException('Requested CORS is invalid: %s at line %s, '
1568                             'column %s' % (e.getMessage(), e.getLineNumber(),
1569                                            e.getColumnNumber()))
1570
1571    try:
1572      uri = boto.storage_uri(
1573          storage_url.url_string, suppress_consec_slashes=False,
1574          bucket_storage_uri_class=self.bucket_storage_uri_class,
1575          debug=self.debug)
1576      uri.set_cors(cors_obj, False)
1577    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1578      self._TranslateExceptionAndRaise(e)
1579
1580  def XmlPassThroughGetCors(self, storage_url):
1581    """See CloudApiDelegator class for function doc strings."""
1582    uri = boto.storage_uri(
1583        storage_url.url_string, suppress_consec_slashes=False,
1584        bucket_storage_uri_class=self.bucket_storage_uri_class,
1585        debug=self.debug)
1586    try:
1587      cors = uri.get_cors(False)
1588    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1589      self._TranslateExceptionAndRaise(e)
1590
1591    parsed_xml = xml.dom.minidom.parseString(cors.to_xml().encode(UTF8))
1592    # Pretty-print the XML to make it more easily human editable.
1593    return parsed_xml.toprettyxml(indent='    ')
1594
1595  def XmlPassThroughGetLifecycle(self, storage_url):
1596    """See CloudApiDelegator class for function doc strings."""
1597    try:
1598      uri = boto.storage_uri(
1599          storage_url.url_string, suppress_consec_slashes=False,
1600          bucket_storage_uri_class=self.bucket_storage_uri_class,
1601          debug=self.debug)
1602      lifecycle = uri.get_lifecycle_config(False)
1603    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1604      self._TranslateExceptionAndRaise(e)
1605
1606    parsed_xml = xml.dom.minidom.parseString(lifecycle.to_xml().encode(UTF8))
1607    # Pretty-print the XML to make it more easily human editable.
1608    return parsed_xml.toprettyxml(indent='    ')
1609
1610  def XmlPassThroughSetLifecycle(self, lifecycle_text, storage_url):
1611    """See CloudApiDelegator class for function doc strings."""
1612    # Parse XML document and convert into lifecycle object.
1613    if storage_url.scheme == 's3':
1614      lifecycle_obj = S3Lifecycle()
1615    else:
1616      lifecycle_obj = LifecycleConfig()
1617    h = handler.XmlHandler(lifecycle_obj, None)
1618    try:
1619      xml.sax.parseString(lifecycle_text, h)
1620    except SaxExceptions.SAXParseException, e:
1621      raise CommandException(
1622          'Requested lifecycle config is invalid: %s at line %s, column %s' %
1623          (e.getMessage(), e.getLineNumber(), e.getColumnNumber()))
1624
1625    try:
1626      uri = boto.storage_uri(
1627          storage_url.url_string, suppress_consec_slashes=False,
1628          bucket_storage_uri_class=self.bucket_storage_uri_class,
1629          debug=self.debug)
1630      uri.configure_lifecycle(lifecycle_obj, False)
1631    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1632      self._TranslateExceptionAndRaise(e)
1633
1634  def XmlPassThroughGetLogging(self, storage_url):
1635    """See CloudApiDelegator class for function doc strings."""
1636    try:
1637      uri = boto.storage_uri(
1638          storage_url.url_string, suppress_consec_slashes=False,
1639          bucket_storage_uri_class=self.bucket_storage_uri_class,
1640          debug=self.debug)
1641      logging_config_xml = UnaryDictToXml(uri.get_logging_config())
1642    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1643      self._TranslateExceptionAndRaise(e)
1644
1645    return XmlParseString(logging_config_xml).toprettyxml()
1646
1647  def XmlPassThroughGetWebsite(self, storage_url):
1648    """See CloudApiDelegator class for function doc strings."""
1649    try:
1650      uri = boto.storage_uri(
1651          storage_url.url_string, suppress_consec_slashes=False,
1652          bucket_storage_uri_class=self.bucket_storage_uri_class,
1653          debug=self.debug)
1654      web_config_xml = UnaryDictToXml(uri.get_website_config())
1655    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1656      self._TranslateExceptionAndRaise(e)
1657
1658    return XmlParseString(web_config_xml).toprettyxml()
1659