1# -*- coding: utf-8 -*- 2# Copyright 2013 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""Implementation of compose command for Google Cloud Storage.""" 16 17from __future__ import absolute_import 18 19from gslib.bucket_listing_ref import BucketListingObject 20from gslib.command import Command 21from gslib.command_argument import CommandArgument 22from gslib.cs_api_map import ApiSelector 23from gslib.exception import CommandException 24from gslib.storage_url import ContainsWildcard 25from gslib.storage_url import StorageUrlFromString 26from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages 27from gslib.translation_helper import PreconditionsFromHeaders 28 29MAX_COMPONENT_COUNT = 1024 30MAX_COMPOSE_ARITY = 32 31 32_SYNOPSIS = """ 33 gsutil compose gs://bucket/obj1 gs://bucket/obj2 ... gs://bucket/composite 34""" 35 36_DETAILED_HELP_TEXT = (""" 37<B>SYNOPSIS</B> 38""" + _SYNOPSIS + """ 39 40 41<B>DESCRIPTION</B> 42 The compose command creates a new object whose content is the concatenation 43 of a given sequence of component objects under the same bucket. gsutil uses 44 the content type of the first source object to determine the destination 45 object's content type. For more information, please see: 46 https://developers.google.com/storage/docs/composite-objects 47 48 Note also that the gsutil cp command will automatically split uploads for 49 large files into multiple component objects, upload them in parallel, and 50 compose them into a final object (which will be subject to the component 51 count limit). This will still perform all uploads from a single machine. For 52 extremely large files and/or very low per-machine bandwidth, you may want to 53 split the file and upload it from multiple machines, and later compose these 54 parts of the file manually. See the 'PARALLEL COMPOSITE UPLOADS' section under 55 'gsutil help cp' for details. 56 57 Appending simply entails uploading your new data to a temporary object, 58 composing it with the growing append-target, and deleting the temporary 59 object: 60 61 $ echo 'new data' | gsutil cp - gs://bucket/data-to-append 62 $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\ 63 gs://bucket/append-target 64 $ gsutil rm gs://bucket/data-to-append 65 66 Note that there is a limit (currently %d) to the number of components for a 67 given composite object. This means you can append to each object at most %d 68 times. 69""" % (MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1)) 70 71 72class ComposeCommand(Command): 73 """Implementation of gsutil compose command.""" 74 75 # Command specification. See base class for documentation. 76 command_spec = Command.CreateCommandSpec( 77 'compose', 78 command_name_aliases=['concat'], 79 usage_synopsis=_SYNOPSIS, 80 min_args=2, 81 max_args=MAX_COMPOSE_ARITY + 1, 82 supported_sub_args='', 83 # Not files, just object names without gs:// prefix. 84 file_url_ok=False, 85 provider_url_ok=False, 86 urls_start_arg=1, 87 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], 88 gs_default_api=ApiSelector.JSON, 89 argparse_arguments=[ 90 CommandArgument.MakeZeroOrMoreCloudURLsArgument() 91 ] 92 ) 93 # Help specification. See help_provider.py for documentation. 94 help_spec = Command.HelpSpec( 95 help_name='compose', 96 help_name_aliases=['concat'], 97 help_type='command_help', 98 help_one_line_summary=( 99 'Concatenate a sequence of objects into a new composite object.'), 100 help_text=_DETAILED_HELP_TEXT, 101 subcommand_help_text={}, 102 ) 103 104 def CheckProvider(self, url): 105 if url.scheme != 'gs': 106 raise CommandException( 107 '"compose" called on URL with unsupported provider (%s).' % str(url)) 108 109 # Command entry point. 110 def RunCommand(self): 111 """Command entry point for the compose command.""" 112 target_url_str = self.args[-1] 113 self.args = self.args[:-1] 114 target_url = StorageUrlFromString(target_url_str) 115 self.CheckProvider(target_url) 116 if target_url.HasGeneration(): 117 raise CommandException('A version-specific URL (%s) cannot be ' 118 'the destination for gsutil compose - abort.' 119 % target_url) 120 121 dst_obj_metadata = apitools_messages.Object(name=target_url.object_name, 122 bucket=target_url.bucket_name) 123 124 components = [] 125 # Remember the first source object so we can get its content type. 126 first_src_url = None 127 for src_url_str in self.args: 128 if ContainsWildcard(src_url_str): 129 src_url_iter = self.WildcardIterator(src_url_str).IterObjects() 130 else: 131 src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))] 132 for blr in src_url_iter: 133 src_url = blr.storage_url 134 self.CheckProvider(src_url) 135 136 if src_url.bucket_name != target_url.bucket_name: 137 raise CommandException( 138 'GCS does not support inter-bucket composing.') 139 140 if not first_src_url: 141 first_src_url = src_url 142 src_obj_metadata = ( 143 apitools_messages.ComposeRequest.SourceObjectsValueListEntry( 144 name=src_url.object_name)) 145 if src_url.HasGeneration(): 146 src_obj_metadata.generation = src_url.generation 147 components.append(src_obj_metadata) 148 # Avoid expanding too many components, and sanity check each name 149 # expansion result. 150 if len(components) > MAX_COMPOSE_ARITY: 151 raise CommandException('"compose" called with too many component ' 152 'objects. Limit is %d.' % MAX_COMPOSE_ARITY) 153 154 if len(components) < 2: 155 raise CommandException('"compose" requires at least 2 component objects.') 156 157 dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata( 158 first_src_url.bucket_name, first_src_url.object_name, 159 provider=first_src_url.scheme, fields=['contentType']).contentType 160 161 preconditions = PreconditionsFromHeaders(self.headers or {}) 162 163 self.logger.info( 164 'Composing %s from %d component objects.', target_url, len(components)) 165 self.gsutil_api.ComposeObject(components, dst_obj_metadata, 166 preconditions=preconditions, 167 provider=target_url.scheme) 168