1# Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/ 2# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. 3# Copyright (c) 2010, Eucalyptus Systems, Inc. 4# All rights reserved. 5# 6# Permission is hereby granted, free of charge, to any person obtaining a 7# copy of this software and associated documentation files (the 8# "Software"), to deal in the Software without restriction, including 9# without limitation the rights to use, copy, modify, merge, publish, dis- 10# tribute, sublicense, and/or sell copies of the Software, and to permit 11# persons to whom the Software is furnished to do so, subject to the fol- 12# lowing conditions: 13# 14# The above copyright notice and this permission notice shall be included 15# in all copies or substantial portions of the Software. 16# 17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- 19# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 20# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 21# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23# IN THE SOFTWARE. 24 25import xml.sax 26import base64 27from boto.compat import six, urllib 28import time 29 30from boto.auth import detect_potential_s3sigv4 31import boto.utils 32from boto.connection import AWSAuthConnection 33from boto import handler 34from boto.s3.bucket import Bucket 35from boto.s3.key import Key 36from boto.resultset import ResultSet 37from boto.exception import BotoClientError, S3ResponseError 38 39 40def check_lowercase_bucketname(n): 41 """ 42 Bucket names must not contain uppercase characters. We check for 43 this by appending a lowercase character and testing with islower(). 44 Note this also covers cases like numeric bucket names with dashes. 45 46 >>> check_lowercase_bucketname("Aaaa") 47 Traceback (most recent call last): 48 ... 49 BotoClientError: S3Error: Bucket names cannot contain upper-case 50 characters when using either the sub-domain or virtual hosting calling 51 format. 52 53 >>> check_lowercase_bucketname("1234-5678-9123") 54 True 55 >>> check_lowercase_bucketname("abcdefg1234") 56 True 57 """ 58 if not (n + 'a').islower(): 59 raise BotoClientError("Bucket names cannot contain upper-case " \ 60 "characters when using either the sub-domain or virtual " \ 61 "hosting calling format.") 62 return True 63 64 65def assert_case_insensitive(f): 66 def wrapper(*args, **kwargs): 67 if len(args) == 3 and check_lowercase_bucketname(args[2]): 68 pass 69 return f(*args, **kwargs) 70 return wrapper 71 72 73class _CallingFormat(object): 74 75 def get_bucket_server(self, server, bucket): 76 return '' 77 78 def build_url_base(self, connection, protocol, server, bucket, key=''): 79 url_base = '%s://' % protocol 80 url_base += self.build_host(server, bucket) 81 url_base += connection.get_path(self.build_path_base(bucket, key)) 82 return url_base 83 84 def build_host(self, server, bucket): 85 if bucket == '': 86 return server 87 else: 88 return self.get_bucket_server(server, bucket) 89 90 def build_auth_path(self, bucket, key=''): 91 key = boto.utils.get_utf8_value(key) 92 path = '' 93 if bucket != '': 94 path = '/' + bucket 95 return path + '/%s' % urllib.parse.quote(key) 96 97 def build_path_base(self, bucket, key=''): 98 key = boto.utils.get_utf8_value(key) 99 return '/%s' % urllib.parse.quote(key) 100 101 102class SubdomainCallingFormat(_CallingFormat): 103 104 @assert_case_insensitive 105 def get_bucket_server(self, server, bucket): 106 return '%s.%s' % (bucket, server) 107 108 109class VHostCallingFormat(_CallingFormat): 110 111 @assert_case_insensitive 112 def get_bucket_server(self, server, bucket): 113 return bucket 114 115 116class OrdinaryCallingFormat(_CallingFormat): 117 118 def get_bucket_server(self, server, bucket): 119 return server 120 121 def build_path_base(self, bucket, key=''): 122 key = boto.utils.get_utf8_value(key) 123 path_base = '/' 124 if bucket: 125 path_base += "%s/" % bucket 126 return path_base + urllib.parse.quote(key) 127 128 129class ProtocolIndependentOrdinaryCallingFormat(OrdinaryCallingFormat): 130 131 def build_url_base(self, connection, protocol, server, bucket, key=''): 132 url_base = '//' 133 url_base += self.build_host(server, bucket) 134 url_base += connection.get_path(self.build_path_base(bucket, key)) 135 return url_base 136 137 138class Location(object): 139 140 DEFAULT = '' # US Classic Region 141 EU = 'EU' 142 USWest = 'us-west-1' 143 USWest2 = 'us-west-2' 144 SAEast = 'sa-east-1' 145 APNortheast = 'ap-northeast-1' 146 APSoutheast = 'ap-southeast-1' 147 APSoutheast2 = 'ap-southeast-2' 148 CNNorth1 = 'cn-north-1' 149 150 151class NoHostProvided(object): 152 # An identifying object to help determine whether the user provided a 153 # ``host`` or not. Never instantiated. 154 pass 155 156 157class HostRequiredError(BotoClientError): 158 pass 159 160 161class S3Connection(AWSAuthConnection): 162 163 DefaultHost = boto.config.get('s3', 'host', 's3.amazonaws.com') 164 DefaultCallingFormat = boto.config.get('s3', 'calling_format', 'boto.s3.connection.SubdomainCallingFormat') 165 QueryString = 'Signature=%s&Expires=%d&AWSAccessKeyId=%s' 166 167 def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, 168 is_secure=True, port=None, proxy=None, proxy_port=None, 169 proxy_user=None, proxy_pass=None, 170 host=NoHostProvided, debug=0, https_connection_factory=None, 171 calling_format=DefaultCallingFormat, path='/', 172 provider='aws', bucket_class=Bucket, security_token=None, 173 suppress_consec_slashes=True, anon=False, 174 validate_certs=None, profile_name=None): 175 no_host_provided = False 176 if host is NoHostProvided: 177 no_host_provided = True 178 host = self.DefaultHost 179 if isinstance(calling_format, six.string_types): 180 calling_format=boto.utils.find_class(calling_format)() 181 self.calling_format = calling_format 182 self.bucket_class = bucket_class 183 self.anon = anon 184 super(S3Connection, self).__init__(host, 185 aws_access_key_id, aws_secret_access_key, 186 is_secure, port, proxy, proxy_port, proxy_user, proxy_pass, 187 debug=debug, https_connection_factory=https_connection_factory, 188 path=path, provider=provider, security_token=security_token, 189 suppress_consec_slashes=suppress_consec_slashes, 190 validate_certs=validate_certs, profile_name=profile_name) 191 # We need to delay until after the call to ``super`` before checking 192 # to see if SigV4 is in use. 193 if no_host_provided: 194 if 'hmac-v4-s3' in self._required_auth_capability(): 195 raise HostRequiredError( 196 "When using SigV4, you must specify a 'host' parameter." 197 ) 198 199 @detect_potential_s3sigv4 200 def _required_auth_capability(self): 201 if self.anon: 202 return ['anon'] 203 else: 204 return ['s3'] 205 206 def __iter__(self): 207 for bucket in self.get_all_buckets(): 208 yield bucket 209 210 def __contains__(self, bucket_name): 211 return not (self.lookup(bucket_name) is None) 212 213 def set_bucket_class(self, bucket_class): 214 """ 215 Set the Bucket class associated with this bucket. By default, this 216 would be the boto.s3.key.Bucket class but if you want to subclass that 217 for some reason this allows you to associate your new class. 218 219 :type bucket_class: class 220 :param bucket_class: A subclass of Bucket that can be more specific 221 """ 222 self.bucket_class = bucket_class 223 224 def build_post_policy(self, expiration_time, conditions): 225 """ 226 Taken from the AWS book Python examples and modified for use with boto 227 """ 228 assert isinstance(expiration_time, time.struct_time), \ 229 'Policy document must include a valid expiration Time object' 230 231 # Convert conditions object mappings to condition statements 232 233 return '{"expiration": "%s",\n"conditions": [%s]}' % \ 234 (time.strftime(boto.utils.ISO8601, expiration_time), ",".join(conditions)) 235 236 def build_post_form_args(self, bucket_name, key, expires_in=6000, 237 acl=None, success_action_redirect=None, 238 max_content_length=None, 239 http_method='http', fields=None, 240 conditions=None, storage_class='STANDARD', 241 server_side_encryption=None): 242 """ 243 Taken from the AWS book Python examples and modified for use with boto 244 This only returns the arguments required for the post form, not the 245 actual form. This does not return the file input field which also 246 needs to be added 247 248 :type bucket_name: string 249 :param bucket_name: Bucket to submit to 250 251 :type key: string 252 :param key: Key name, optionally add ${filename} to the end to 253 attach the submitted filename 254 255 :type expires_in: integer 256 :param expires_in: Time (in seconds) before this expires, defaults 257 to 6000 258 259 :type acl: string 260 :param acl: A canned ACL. One of: 261 * private 262 * public-read 263 * public-read-write 264 * authenticated-read 265 * bucket-owner-read 266 * bucket-owner-full-control 267 268 :type success_action_redirect: string 269 :param success_action_redirect: URL to redirect to on success 270 271 :type max_content_length: integer 272 :param max_content_length: Maximum size for this file 273 274 :type http_method: string 275 :param http_method: HTTP Method to use, "http" or "https" 276 277 :type storage_class: string 278 :param storage_class: Storage class to use for storing the object. 279 Valid values: STANDARD | REDUCED_REDUNDANCY 280 281 :type server_side_encryption: string 282 :param server_side_encryption: Specifies server-side encryption 283 algorithm to use when Amazon S3 creates an object. 284 Valid values: None | AES256 285 286 :rtype: dict 287 :return: A dictionary containing field names/values as well as 288 a url to POST to 289 290 .. code-block:: python 291 292 293 """ 294 if fields is None: 295 fields = [] 296 if conditions is None: 297 conditions = [] 298 expiration = time.gmtime(int(time.time() + expires_in)) 299 300 # Generate policy document 301 conditions.append('{"bucket": "%s"}' % bucket_name) 302 if key.endswith("${filename}"): 303 conditions.append('["starts-with", "$key", "%s"]' % key[:-len("${filename}")]) 304 else: 305 conditions.append('{"key": "%s"}' % key) 306 if acl: 307 conditions.append('{"acl": "%s"}' % acl) 308 fields.append({"name": "acl", "value": acl}) 309 if success_action_redirect: 310 conditions.append('{"success_action_redirect": "%s"}' % success_action_redirect) 311 fields.append({"name": "success_action_redirect", "value": success_action_redirect}) 312 if max_content_length: 313 conditions.append('["content-length-range", 0, %i]' % max_content_length) 314 315 if self.provider.security_token: 316 fields.append({'name': 'x-amz-security-token', 317 'value': self.provider.security_token}) 318 conditions.append('{"x-amz-security-token": "%s"}' % self.provider.security_token) 319 320 if storage_class: 321 fields.append({'name': 'x-amz-storage-class', 322 'value': storage_class}) 323 conditions.append('{"x-amz-storage-class": "%s"}' % storage_class) 324 325 if server_side_encryption: 326 fields.append({'name': 'x-amz-server-side-encryption', 327 'value': server_side_encryption}) 328 conditions.append('{"x-amz-server-side-encryption": "%s"}' % server_side_encryption) 329 330 policy = self.build_post_policy(expiration, conditions) 331 332 # Add the base64-encoded policy document as the 'policy' field 333 policy_b64 = base64.b64encode(policy) 334 fields.append({"name": "policy", "value": policy_b64}) 335 336 # Add the AWS access key as the 'AWSAccessKeyId' field 337 fields.append({"name": "AWSAccessKeyId", 338 "value": self.aws_access_key_id}) 339 340 # Add signature for encoded policy document as the 341 # 'signature' field 342 signature = self._auth_handler.sign_string(policy_b64) 343 fields.append({"name": "signature", "value": signature}) 344 fields.append({"name": "key", "value": key}) 345 346 # HTTPS protocol will be used if the secure HTTP option is enabled. 347 url = '%s://%s/' % (http_method, 348 self.calling_format.build_host(self.server_name(), 349 bucket_name)) 350 351 return {"action": url, "fields": fields} 352 353 def generate_url_sigv4(self, expires_in, method, bucket='', key='', 354 headers=None, force_http=False, 355 response_headers=None, version_id=None, 356 iso_date=None): 357 path = self.calling_format.build_path_base(bucket, key) 358 auth_path = self.calling_format.build_auth_path(bucket, key) 359 host = self.calling_format.build_host(self.server_name(), bucket) 360 361 # For presigned URLs we should ignore the port if it's HTTPS 362 if host.endswith(':443'): 363 host = host[:-4] 364 365 params = {} 366 if version_id is not None: 367 params['VersionId'] = version_id 368 369 http_request = self.build_base_http_request(method, path, auth_path, 370 headers=headers, host=host, 371 params=params) 372 373 return self._auth_handler.presign(http_request, expires_in, 374 iso_date=iso_date) 375 376 def generate_url(self, expires_in, method, bucket='', key='', headers=None, 377 query_auth=True, force_http=False, response_headers=None, 378 expires_in_absolute=False, version_id=None): 379 if self._auth_handler.capability[0] == 'hmac-v4-s3': 380 # Handle the special sigv4 case 381 return self.generate_url_sigv4(expires_in, method, bucket=bucket, 382 key=key, headers=headers, force_http=force_http, 383 response_headers=response_headers, version_id=version_id) 384 385 headers = headers or {} 386 if expires_in_absolute: 387 expires = int(expires_in) 388 else: 389 expires = int(time.time() + expires_in) 390 auth_path = self.calling_format.build_auth_path(bucket, key) 391 auth_path = self.get_path(auth_path) 392 # optional version_id and response_headers need to be added to 393 # the query param list. 394 extra_qp = [] 395 if version_id is not None: 396 extra_qp.append("versionId=%s" % version_id) 397 if response_headers: 398 for k, v in response_headers.items(): 399 extra_qp.append("%s=%s" % (k, urllib.parse.quote(v))) 400 if self.provider.security_token: 401 headers['x-amz-security-token'] = self.provider.security_token 402 if extra_qp: 403 delimiter = '?' if '?' not in auth_path else '&' 404 auth_path += delimiter + '&'.join(extra_qp) 405 c_string = boto.utils.canonical_string(method, auth_path, headers, 406 expires, self.provider) 407 b64_hmac = self._auth_handler.sign_string(c_string) 408 encoded_canonical = urllib.parse.quote(b64_hmac, safe='') 409 self.calling_format.build_path_base(bucket, key) 410 if query_auth: 411 query_part = '?' + self.QueryString % (encoded_canonical, expires, 412 self.aws_access_key_id) 413 else: 414 query_part = '' 415 if headers: 416 hdr_prefix = self.provider.header_prefix 417 for k, v in headers.items(): 418 if k.startswith(hdr_prefix): 419 # headers used for sig generation must be 420 # included in the url also. 421 extra_qp.append("%s=%s" % (k, urllib.parse.quote(v))) 422 if extra_qp: 423 delimiter = '?' if not query_part else '&' 424 query_part += delimiter + '&'.join(extra_qp) 425 if force_http: 426 protocol = 'http' 427 port = 80 428 else: 429 protocol = self.protocol 430 port = self.port 431 return self.calling_format.build_url_base(self, protocol, 432 self.server_name(port), 433 bucket, key) + query_part 434 435 def get_all_buckets(self, headers=None): 436 response = self.make_request('GET', headers=headers) 437 body = response.read() 438 if response.status > 300: 439 raise self.provider.storage_response_error( 440 response.status, response.reason, body) 441 rs = ResultSet([('Bucket', self.bucket_class)]) 442 h = handler.XmlHandler(rs, self) 443 if not isinstance(body, bytes): 444 body = body.encode('utf-8') 445 xml.sax.parseString(body, h) 446 return rs 447 448 def get_canonical_user_id(self, headers=None): 449 """ 450 Convenience method that returns the "CanonicalUserID" of the 451 user who's credentials are associated with the connection. 452 The only way to get this value is to do a GET request on the 453 service which returns all buckets associated with the account. 454 As part of that response, the canonical userid is returned. 455 This method simply does all of that and then returns just the 456 user id. 457 458 :rtype: string 459 :return: A string containing the canonical user id. 460 """ 461 rs = self.get_all_buckets(headers=headers) 462 return rs.owner.id 463 464 def get_bucket(self, bucket_name, validate=True, headers=None): 465 """ 466 Retrieves a bucket by name. 467 468 If the bucket does not exist, an ``S3ResponseError`` will be raised. If 469 you are unsure if the bucket exists or not, you can use the 470 ``S3Connection.lookup`` method, which will either return a valid bucket 471 or ``None``. 472 473 If ``validate=False`` is passed, no request is made to the service (no 474 charge/communication delay). This is only safe to do if you are **sure** 475 the bucket exists. 476 477 If the default ``validate=True`` is passed, a request is made to the 478 service to ensure the bucket exists. Prior to Boto v2.25.0, this fetched 479 a list of keys (but with a max limit set to ``0``, always returning an empty 480 list) in the bucket (& included better error messages), at an 481 increased expense. As of Boto v2.25.0, this now performs a HEAD request 482 (less expensive but worse error messages). 483 484 If you were relying on parsing the error message before, you should call 485 something like:: 486 487 bucket = conn.get_bucket('<bucket_name>', validate=False) 488 bucket.get_all_keys(maxkeys=0) 489 490 :type bucket_name: string 491 :param bucket_name: The name of the bucket 492 493 :type headers: dict 494 :param headers: Additional headers to pass along with the request to 495 AWS. 496 497 :type validate: boolean 498 :param validate: If ``True``, it will try to verify the bucket exists 499 on the service-side. (Default: ``True``) 500 """ 501 if validate: 502 return self.head_bucket(bucket_name, headers=headers) 503 else: 504 return self.bucket_class(self, bucket_name) 505 506 def head_bucket(self, bucket_name, headers=None): 507 """ 508 Determines if a bucket exists by name. 509 510 If the bucket does not exist, an ``S3ResponseError`` will be raised. 511 512 :type bucket_name: string 513 :param bucket_name: The name of the bucket 514 515 :type headers: dict 516 :param headers: Additional headers to pass along with the request to 517 AWS. 518 519 :returns: A <Bucket> object 520 """ 521 response = self.make_request('HEAD', bucket_name, headers=headers) 522 body = response.read() 523 if response.status == 200: 524 return self.bucket_class(self, bucket_name) 525 elif response.status == 403: 526 # For backward-compatibility, we'll populate part of the exception 527 # with the most-common default. 528 err = self.provider.storage_response_error( 529 response.status, 530 response.reason, 531 body 532 ) 533 err.error_code = 'AccessDenied' 534 err.error_message = 'Access Denied' 535 raise err 536 elif response.status == 404: 537 # For backward-compatibility, we'll populate part of the exception 538 # with the most-common default. 539 err = self.provider.storage_response_error( 540 response.status, 541 response.reason, 542 body 543 ) 544 err.error_code = 'NoSuchBucket' 545 err.error_message = 'The specified bucket does not exist' 546 raise err 547 else: 548 raise self.provider.storage_response_error( 549 response.status, response.reason, body) 550 551 def lookup(self, bucket_name, validate=True, headers=None): 552 """ 553 Attempts to get a bucket from S3. 554 555 Works identically to ``S3Connection.get_bucket``, save for that it 556 will return ``None`` if the bucket does not exist instead of throwing 557 an exception. 558 559 :type bucket_name: string 560 :param bucket_name: The name of the bucket 561 562 :type headers: dict 563 :param headers: Additional headers to pass along with the request to 564 AWS. 565 566 :type validate: boolean 567 :param validate: If ``True``, it will try to fetch all keys within the 568 given bucket. (Default: ``True``) 569 """ 570 try: 571 bucket = self.get_bucket(bucket_name, validate, headers=headers) 572 except: 573 bucket = None 574 return bucket 575 576 def create_bucket(self, bucket_name, headers=None, 577 location=Location.DEFAULT, policy=None): 578 """ 579 Creates a new located bucket. By default it's in the USA. You can pass 580 Location.EU to create a European bucket (S3) or European Union bucket 581 (GCS). 582 583 :type bucket_name: string 584 :param bucket_name: The name of the new bucket 585 586 :type headers: dict 587 :param headers: Additional headers to pass along with the request to AWS. 588 589 :type location: str 590 :param location: The location of the new bucket. You can use one of the 591 constants in :class:`boto.s3.connection.Location` (e.g. Location.EU, 592 Location.USWest, etc.). 593 594 :type policy: :class:`boto.s3.acl.CannedACLStrings` 595 :param policy: A canned ACL policy that will be applied to the 596 new key in S3. 597 598 """ 599 check_lowercase_bucketname(bucket_name) 600 601 if policy: 602 if headers: 603 headers[self.provider.acl_header] = policy 604 else: 605 headers = {self.provider.acl_header: policy} 606 if location == Location.DEFAULT: 607 data = '' 608 else: 609 data = '<CreateBucketConfiguration><LocationConstraint>' + \ 610 location + '</LocationConstraint></CreateBucketConfiguration>' 611 response = self.make_request('PUT', bucket_name, headers=headers, 612 data=data) 613 body = response.read() 614 if response.status == 409: 615 raise self.provider.storage_create_error( 616 response.status, response.reason, body) 617 if response.status == 200: 618 return self.bucket_class(self, bucket_name) 619 else: 620 raise self.provider.storage_response_error( 621 response.status, response.reason, body) 622 623 def delete_bucket(self, bucket, headers=None): 624 """ 625 Removes an S3 bucket. 626 627 In order to remove the bucket, it must first be empty. If the bucket is 628 not empty, an ``S3ResponseError`` will be raised. 629 630 :type bucket_name: string 631 :param bucket_name: The name of the bucket 632 633 :type headers: dict 634 :param headers: Additional headers to pass along with the request to 635 AWS. 636 """ 637 response = self.make_request('DELETE', bucket, headers=headers) 638 body = response.read() 639 if response.status != 204: 640 raise self.provider.storage_response_error( 641 response.status, response.reason, body) 642 643 def make_request(self, method, bucket='', key='', headers=None, data='', 644 query_args=None, sender=None, override_num_retries=None, 645 retry_handler=None): 646 if isinstance(bucket, self.bucket_class): 647 bucket = bucket.name 648 if isinstance(key, Key): 649 key = key.name 650 path = self.calling_format.build_path_base(bucket, key) 651 boto.log.debug('path=%s' % path) 652 auth_path = self.calling_format.build_auth_path(bucket, key) 653 boto.log.debug('auth_path=%s' % auth_path) 654 host = self.calling_format.build_host(self.server_name(), bucket) 655 if query_args: 656 path += '?' + query_args 657 boto.log.debug('path=%s' % path) 658 auth_path += '?' + query_args 659 boto.log.debug('auth_path=%s' % auth_path) 660 return super(S3Connection, self).make_request( 661 method, path, headers, 662 data, host, auth_path, sender, 663 override_num_retries=override_num_retries, 664 retry_handler=retry_handler 665 ) 666