• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2# Copyright 2013 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Implementation of compose command for Google Cloud Storage."""
16
17from __future__ import absolute_import
18
19from gslib.bucket_listing_ref import BucketListingObject
20from gslib.command import Command
21from gslib.command_argument import CommandArgument
22from gslib.cs_api_map import ApiSelector
23from gslib.exception import CommandException
24from gslib.storage_url import ContainsWildcard
25from gslib.storage_url import StorageUrlFromString
26from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
27from gslib.translation_helper import PreconditionsFromHeaders
28
29MAX_COMPONENT_COUNT = 1024
30MAX_COMPOSE_ARITY = 32
31
32_SYNOPSIS = """
33  gsutil compose gs://bucket/obj1 gs://bucket/obj2 ... gs://bucket/composite
34"""
35
36_DETAILED_HELP_TEXT = ("""
37<B>SYNOPSIS</B>
38""" + _SYNOPSIS + """
39
40
41<B>DESCRIPTION</B>
42  The compose command creates a new object whose content is the concatenation
43  of a given sequence of component objects under the same bucket. gsutil uses
44  the content type of the first source object to determine the destination
45  object's content type. For more information, please see:
46  https://developers.google.com/storage/docs/composite-objects
47
48  Note also that the gsutil cp command will automatically split uploads for
49  large files into multiple component objects, upload them in parallel, and
50  compose them into a final object (which will be subject to the component
51  count limit). This will still perform all uploads from a single machine. For
52  extremely large files and/or very low per-machine bandwidth, you may want to
53  split the file and upload it from multiple machines, and later compose these
54  parts of the file manually. See the 'PARALLEL COMPOSITE UPLOADS' section under
55  'gsutil help cp' for details.
56
57  Appending simply entails uploading your new data to a temporary object,
58  composing it with the growing append-target, and deleting the temporary
59  object:
60
61    $ echo 'new data' | gsutil cp - gs://bucket/data-to-append
62    $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\
63        gs://bucket/append-target
64    $ gsutil rm gs://bucket/data-to-append
65
66  Note that there is a limit (currently %d) to the number of components for a
67  given composite object. This means you can append to each object at most %d
68  times.
69""" % (MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1))
70
71
72class ComposeCommand(Command):
73  """Implementation of gsutil compose command."""
74
75  # Command specification. See base class for documentation.
76  command_spec = Command.CreateCommandSpec(
77      'compose',
78      command_name_aliases=['concat'],
79      usage_synopsis=_SYNOPSIS,
80      min_args=2,
81      max_args=MAX_COMPOSE_ARITY + 1,
82      supported_sub_args='',
83      # Not files, just object names without gs:// prefix.
84      file_url_ok=False,
85      provider_url_ok=False,
86      urls_start_arg=1,
87      gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
88      gs_default_api=ApiSelector.JSON,
89      argparse_arguments=[
90          CommandArgument.MakeZeroOrMoreCloudURLsArgument()
91      ]
92  )
93  # Help specification. See help_provider.py for documentation.
94  help_spec = Command.HelpSpec(
95      help_name='compose',
96      help_name_aliases=['concat'],
97      help_type='command_help',
98      help_one_line_summary=(
99          'Concatenate a sequence of objects into a new composite object.'),
100      help_text=_DETAILED_HELP_TEXT,
101      subcommand_help_text={},
102  )
103
104  def CheckProvider(self, url):
105    if url.scheme != 'gs':
106      raise CommandException(
107          '"compose" called on URL with unsupported provider (%s).' % str(url))
108
109  # Command entry point.
110  def RunCommand(self):
111    """Command entry point for the compose command."""
112    target_url_str = self.args[-1]
113    self.args = self.args[:-1]
114    target_url = StorageUrlFromString(target_url_str)
115    self.CheckProvider(target_url)
116    if target_url.HasGeneration():
117      raise CommandException('A version-specific URL (%s) cannot be '
118                             'the destination for gsutil compose - abort.'
119                             % target_url)
120
121    dst_obj_metadata = apitools_messages.Object(name=target_url.object_name,
122                                                bucket=target_url.bucket_name)
123
124    components = []
125    # Remember the first source object so we can get its content type.
126    first_src_url = None
127    for src_url_str in self.args:
128      if ContainsWildcard(src_url_str):
129        src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
130      else:
131        src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))]
132      for blr in src_url_iter:
133        src_url = blr.storage_url
134        self.CheckProvider(src_url)
135
136        if src_url.bucket_name != target_url.bucket_name:
137          raise CommandException(
138              'GCS does not support inter-bucket composing.')
139
140        if not first_src_url:
141          first_src_url = src_url
142        src_obj_metadata = (
143            apitools_messages.ComposeRequest.SourceObjectsValueListEntry(
144                name=src_url.object_name))
145        if src_url.HasGeneration():
146          src_obj_metadata.generation = src_url.generation
147        components.append(src_obj_metadata)
148        # Avoid expanding too many components, and sanity check each name
149        # expansion result.
150        if len(components) > MAX_COMPOSE_ARITY:
151          raise CommandException('"compose" called with too many component '
152                                 'objects. Limit is %d.' % MAX_COMPOSE_ARITY)
153
154    if len(components) < 2:
155      raise CommandException('"compose" requires at least 2 component objects.')
156
157    dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
158        first_src_url.bucket_name, first_src_url.object_name,
159        provider=first_src_url.scheme, fields=['contentType']).contentType
160
161    preconditions = PreconditionsFromHeaders(self.headers or {})
162
163    self.logger.info(
164        'Composing %s from %d component objects.', target_url, len(components))
165    self.gsutil_api.ComposeObject(components, dst_obj_metadata,
166                                  preconditions=preconditions,
167                                  provider=target_url.scheme)
168