• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2
3"""
4Copyright 2014 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Utilities for accessing Google Cloud Storage.
10
11TODO(epoger): move this into tools/utils for broader use?
12"""
13
14# System-level imports
15import os
16import posixpath
17import sys
18try:
19  from apiclient.discovery import build as build_service
20except ImportError:
21  print ('Missing google-api-python-client.  Please install it; directions '
22         'can be found at https://developers.google.com/api-client-library/'
23         'python/start/installation')
24  raise
25
26# Local imports
27import url_utils
28
29
30def download_file(source_bucket, source_path, dest_path,
31                  create_subdirs_if_needed=False):
32  """ Downloads a single file from Google Cloud Storage to local disk.
33
34  Args:
35    source_bucket: GCS bucket to download the file from
36    source_path: full path (Posix-style) within that bucket
37    dest_path: full path (local-OS-style) on local disk to copy the file to
38    create_subdirs_if_needed: boolean; whether to create subdirectories as
39        needed to create dest_path
40  """
41  source_http_url = posixpath.join(
42      'http://storage.googleapis.com', source_bucket, source_path)
43  url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path,
44                          create_subdirs_if_needed=create_subdirs_if_needed)
45
46
47def list_bucket_contents(bucket, subdir=None):
48  """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
49
50  Uses the API documented at
51  https://developers.google.com/storage/docs/json_api/v1/objects/list
52
53  Args:
54    bucket: name of the Google Storage bucket
55    subdir: directory within the bucket to list, or None for root directory
56  """
57  # The GCS command relies on the subdir name (if any) ending with a slash.
58  if subdir and not subdir.endswith('/'):
59    subdir += '/'
60  subdir_length = len(subdir) if subdir else 0
61
62  storage = build_service('storage', 'v1')
63  command = storage.objects().list(
64      bucket=bucket, delimiter='/', fields='items(name),prefixes',
65      prefix=subdir)
66  results = command.execute()
67
68  # The GCS command returned two subdicts:
69  # prefixes: the full path of every directory within subdir, with trailing '/'
70  # items: property dict for each file object within subdir
71  #        (including 'name', which is full path of the object)
72  dirs = []
73  for dir_fullpath in results.get('prefixes', []):
74    dir_basename = dir_fullpath[subdir_length:]
75    dirs.append(dir_basename[:-1])  # strip trailing slash
76  files = []
77  for file_properties in results.get('items', []):
78    file_fullpath = file_properties['name']
79    file_basename = file_fullpath[subdir_length:]
80    files.append(file_basename)
81  return (dirs, files)
82