1#!/usr/bin/python 2 3""" 4Copyright 2014 Google Inc. 5 6Use of this source code is governed by a BSD-style license that can be 7found in the LICENSE file. 8 9Utilities for accessing Google Cloud Storage. 10 11TODO(epoger): move this into tools/utils for broader use? 12""" 13 14# System-level imports 15import os 16import posixpath 17import sys 18try: 19 from apiclient.discovery import build as build_service 20except ImportError: 21 print ('Missing google-api-python-client. Please install it; directions ' 22 'can be found at https://developers.google.com/api-client-library/' 23 'python/start/installation') 24 raise 25 26# Local imports 27import url_utils 28 29 30def download_file(source_bucket, source_path, dest_path, 31 create_subdirs_if_needed=False): 32 """ Downloads a single file from Google Cloud Storage to local disk. 33 34 Args: 35 source_bucket: GCS bucket to download the file from 36 source_path: full path (Posix-style) within that bucket 37 dest_path: full path (local-OS-style) on local disk to copy the file to 38 create_subdirs_if_needed: boolean; whether to create subdirectories as 39 needed to create dest_path 40 """ 41 source_http_url = posixpath.join( 42 'http://storage.googleapis.com', source_bucket, source_path) 43 url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path, 44 create_subdirs_if_needed=create_subdirs_if_needed) 45 46 47def list_bucket_contents(bucket, subdir=None): 48 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. 49 50 Uses the API documented at 51 https://developers.google.com/storage/docs/json_api/v1/objects/list 52 53 Args: 54 bucket: name of the Google Storage bucket 55 subdir: directory within the bucket to list, or None for root directory 56 """ 57 # The GCS command relies on the subdir name (if any) ending with a slash. 58 if subdir and not subdir.endswith('/'): 59 subdir += '/' 60 subdir_length = len(subdir) if subdir else 0 61 62 storage = build_service('storage', 'v1') 63 command = storage.objects().list( 64 bucket=bucket, delimiter='/', fields='items(name),prefixes', 65 prefix=subdir) 66 results = command.execute() 67 68 # The GCS command returned two subdicts: 69 # prefixes: the full path of every directory within subdir, with trailing '/' 70 # items: property dict for each file object within subdir 71 # (including 'name', which is full path of the object) 72 dirs = [] 73 for dir_fullpath in results.get('prefixes', []): 74 dir_basename = dir_fullpath[subdir_length:] 75 dirs.append(dir_basename[:-1]) # strip trailing slash 76 files = [] 77 for file_properties in results.get('items', []): 78 file_fullpath = file_properties['name'] 79 file_basename = file_fullpath[subdir_length:] 80 files.append(file_basename) 81 return (dirs, files) 82