• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2# Copyright 2014 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Utility functions and class for listing commands such as ls and du."""
16
17from __future__ import absolute_import
18
19import fnmatch
20
21from gslib.exception import CommandException
22from gslib.plurality_checkable_iterator import PluralityCheckableIterator
23from gslib.util import UTF8
24from gslib.wildcard_iterator import StorageUrlFromString
25
26
27def PrintNewLine():
28  """Default function for printing new lines between directories."""
29  print
30
31
32def PrintDirHeader(bucket_listing_ref):
33  """Default function for printing headers for prefixes.
34
35  Header is printed prior to listing the contents of the prefix.
36
37  Args:
38    bucket_listing_ref: BucketListingRef of type PREFIX.
39  """
40  print '%s:' % bucket_listing_ref.url_string.encode(UTF8)
41
42
43def PrintBucketHeader(bucket_listing_ref):  # pylint: disable=unused-argument
44  """Default function for printing headers for buckets.
45
46  Header is printed prior to listing the contents of the bucket.
47
48  Args:
49    bucket_listing_ref: BucketListingRef of type BUCKET.
50  """
51  pass
52
53
54def PrintDir(bucket_listing_ref):
55  """Default function for printing buckets or prefixes.
56
57  Args:
58    bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
59  """
60  print bucket_listing_ref.url_string.encode(UTF8)
61
62
63# pylint: disable=unused-argument
64def PrintDirSummary(num_bytes, bucket_listing_ref):
65  """Off-by-default function for printing buckets or prefix size summaries.
66
67  Args:
68    num_bytes: Number of bytes contained in the directory.
69    bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
70  """
71  pass
72
73
74def PrintObject(bucket_listing_ref):
75  """Default printing function for objects.
76
77  Args:
78    bucket_listing_ref: BucketListingRef of type OBJECT.
79
80  Returns:
81    (num_objects, num_bytes).
82  """
83  print bucket_listing_ref.url_string.encode(UTF8)
84  return (1, 0)
85
86
87class LsHelper(object):
88  """Helper class for ls and du."""
89
90  def __init__(self, iterator_func, logger,
91               print_object_func=PrintObject,
92               print_dir_func=PrintDir,
93               print_dir_header_func=PrintDirHeader,
94               print_bucket_header_func=PrintBucketHeader,
95               print_dir_summary_func=PrintDirSummary,
96               print_newline_func=PrintNewLine,
97               all_versions=False, should_recurse=False,
98               exclude_patterns=None, fields=('name',)):
99    """Initializes the helper class to prepare for listing.
100
101    Args:
102      iterator_func: Function for instantiating iterator.
103                     Inputs-
104                       url_string- Url string to iterate on. May include
105                                   wildcards.
106                       all_versions=False- If true, iterate over all object
107                                           versions.
108      logger: Logger for outputting warnings / errors.
109      print_object_func: Function for printing objects.
110      print_dir_func:    Function for printing buckets/prefixes.
111      print_dir_header_func: Function for printing header line for buckets
112                             or prefixes.
113      print_bucket_header_func: Function for printing header line for buckets
114                                or prefixes.
115      print_dir_summary_func: Function for printing size summaries about
116                              buckets/prefixes.
117      print_newline_func: Function for printing new lines between dirs.
118      all_versions:      If true, list all object versions.
119      should_recurse:    If true, recursively listing buckets/prefixes.
120      exclude_patterns:  Patterns to exclude when listing.
121      fields:            Fields to request from bucket listings; this should
122                         include all fields that need to be populated in
123                         objects so they can be listed. Can be set to None
124                         to retrieve all object fields. Defaults to short
125                         listing fields.
126    """
127    self._iterator_func = iterator_func
128    self.logger = logger
129    self._print_object_func = print_object_func
130    self._print_dir_func = print_dir_func
131    self._print_dir_header_func = print_dir_header_func
132    self._print_bucket_header_func = print_bucket_header_func
133    self._print_dir_summary_func = print_dir_summary_func
134    self._print_newline_func = print_newline_func
135    self.all_versions = all_versions
136    self.should_recurse = should_recurse
137    self.exclude_patterns = exclude_patterns
138    self.bucket_listing_fields = fields
139
140  def ExpandUrlAndPrint(self, url):
141    """Iterates over the given URL and calls print functions.
142
143    Args:
144      url: StorageUrl to iterate over.
145
146    Returns:
147      (num_objects, num_bytes) total number of objects and bytes iterated.
148    """
149    num_objects = 0
150    num_dirs = 0
151    num_bytes = 0
152    print_newline = False
153
154    if url.IsBucket() or self.should_recurse:
155      # IsBucket() implies a top-level listing.
156      if url.IsBucket():
157        self._print_bucket_header_func(url)
158      return self._RecurseExpandUrlAndPrint(url.url_string,
159                                            print_initial_newline=False)
160    else:
161      # User provided a prefix or object URL, but it's impossible to tell
162      # which until we do a listing and see what matches.
163      top_level_iterator = PluralityCheckableIterator(self._iterator_func(
164          url.CreatePrefixUrl(wildcard_suffix=None),
165          all_versions=self.all_versions).IterAll(
166              expand_top_level_buckets=True,
167              bucket_listing_fields=self.bucket_listing_fields))
168      plurality = top_level_iterator.HasPlurality()
169
170      for blr in top_level_iterator:
171        if self._MatchesExcludedPattern(blr):
172          continue
173        if blr.IsObject():
174          nd = 0
175          no, nb = self._print_object_func(blr)
176          print_newline = True
177        elif blr.IsPrefix():
178          if print_newline:
179            self._print_newline_func()
180          else:
181            print_newline = True
182          if plurality:
183            self._print_dir_header_func(blr)
184          expansion_url_str = StorageUrlFromString(
185              blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
186          nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
187          self._print_dir_summary_func(nb, blr)
188        else:
189          # We handle all buckets at the top level, so this should never happen.
190          raise CommandException(
191              'Sub-level iterator returned a CsBucketListingRef of type Bucket')
192        num_objects += no
193        num_dirs += nd
194        num_bytes += nb
195      return num_dirs, num_objects, num_bytes
196
197  def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True):
198    """Iterates over the given URL string and calls print functions.
199
200    Args:
201      url_str: String describing StorageUrl to iterate over.
202               Must be of depth one or higher.
203      print_initial_newline: If true, print a newline before recursively
204                             expanded prefixes.
205
206    Returns:
207      (num_objects, num_bytes) total number of objects and bytes iterated.
208    """
209    num_objects = 0
210    num_dirs = 0
211    num_bytes = 0
212    for blr in self._iterator_func(
213        '%s' % url_str, all_versions=self.all_versions).IterAll(
214            expand_top_level_buckets=True,
215            bucket_listing_fields=self.bucket_listing_fields):
216      if self._MatchesExcludedPattern(blr):
217        continue
218
219      if blr.IsObject():
220        nd = 0
221        no, nb = self._print_object_func(blr)
222      elif blr.IsPrefix():
223        if self.should_recurse:
224          if print_initial_newline:
225            self._print_newline_func()
226          else:
227            print_initial_newline = True
228          self._print_dir_header_func(blr)
229          expansion_url_str = StorageUrlFromString(
230              blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
231
232          nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
233          self._print_dir_summary_func(nb, blr)
234        else:
235          nd, no, nb = 1, 0, 0
236          self._print_dir_func(blr)
237      else:
238        # We handle all buckets at the top level, so this should never happen.
239        raise CommandException(
240            'Sub-level iterator returned a bucketListingRef of type Bucket')
241      num_dirs += nd
242      num_objects += no
243      num_bytes += nb
244
245    return num_dirs, num_objects, num_bytes
246
247  def _MatchesExcludedPattern(self, blr):
248    """Checks bucket listing reference against patterns to exclude.
249
250    Args:
251      blr: BucketListingRef to check.
252
253    Returns:
254      True if reference matches a pattern and should be excluded.
255    """
256    if self.exclude_patterns:
257      tomatch = blr.url_string
258      for pattern in self.exclude_patterns:
259        if fnmatch.fnmatch(tomatch, pattern):
260          return True
261    return False
262