1 /* 2 * Copyright (C) 2024 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.tradefed.util.gcs; 18 19 import com.android.tradefed.log.LogUtil.CLog; 20 import com.android.tradefed.util.FileUtil; 21 import com.android.tradefed.util.StreamUtil; 22 23 import com.google.api.client.googleapis.json.GoogleJsonResponseException; 24 import com.google.api.services.storage.Storage; 25 import com.google.api.services.storage.model.Objects; 26 import com.google.api.services.storage.model.StorageObject; 27 import com.google.common.annotations.VisibleForTesting; 28 29 import java.io.ByteArrayInputStream; 30 import java.io.ByteArrayOutputStream; 31 import java.io.File; 32 import java.io.FileOutputStream; 33 import java.io.IOException; 34 import java.io.InputStream; 35 import java.io.OutputStream; 36 import java.math.BigInteger; 37 import java.net.SocketException; 38 import java.net.SocketTimeoutException; 39 import java.nio.file.Paths; 40 import java.util.ArrayList; 41 import java.util.Arrays; 42 import java.util.Collection; 43 import java.util.Collections; 44 import java.util.HashSet; 45 import java.util.List; 46 import java.util.Set; 47 import java.util.regex.Matcher; 48 import java.util.regex.Pattern; 49 50 /** File downloader to download file from google cloud storage (GCS). */ 51 public class GCSFileDownloaderBase extends GCSCommon { 52 public static final String GCS_PREFIX = "gs://"; 53 public static final String GCS_APPROX_PREFIX = "gs:/"; 54 55 private static final Pattern GCS_PATH_PATTERN = Pattern.compile("gs://([^/]*)/(.*)"); 56 private static final String PATH_SEP = "/"; 57 private static final Collection<String> SCOPES = 58 Collections.singleton("https://www.googleapis.com/auth/devstorage.read_only"); 59 private static final long LIST_BATCH_SIZE = 100; 60 61 // Allow downloader to create empty files instead of throwing exception. 62 protected Boolean mCreateEmptyFile = false; 63 GCSFileDownloaderBase(Boolean createEmptyFile)64 public GCSFileDownloaderBase(Boolean createEmptyFile) { 65 mCreateEmptyFile = createEmptyFile; 66 } 67 GCSFileDownloaderBase()68 public GCSFileDownloaderBase() { 69 this(false); 70 } 71 getStorage()72 protected Storage getStorage() throws IOException { 73 return getStorage(SCOPES); 74 } 75 getRemoteFileMetaData(String bucketName, String remoteFilename)76 public StorageObject getRemoteFileMetaData(String bucketName, String remoteFilename) 77 throws IOException { 78 int i = 0; 79 do { 80 i++; 81 try { 82 return getStorage().objects().get(bucketName, remoteFilename).execute(); 83 } catch (GoogleJsonResponseException e) { 84 if (e.getStatusCode() == 404) { 85 return null; 86 } 87 throw e; 88 } catch (SocketTimeoutException e) { 89 // Allow one retry in case of flaky connection. 90 if (i >= 2) { 91 throw e; 92 } 93 } 94 } while (true); 95 } 96 downloadFile(String remoteFilePath)97 public File downloadFile(String remoteFilePath) throws Exception { 98 File destFile = createTempFile(remoteFilePath, null); 99 try { 100 downloadFile(remoteFilePath, destFile); 101 return destFile; 102 } catch (IOException e) { 103 FileUtil.recursiveDelete(destFile); 104 throw e; 105 } 106 } 107 108 /** 109 * Download a file from a GCS bucket file. 110 * 111 * @param bucketName GCS bucket name 112 * @param filename the filename 113 * @return {@link InputStream} with the file content. 114 */ downloadFile(String bucketName, String filename)115 public InputStream downloadFile(String bucketName, String filename) throws IOException { 116 InputStream remoteInput = null; 117 ByteArrayOutputStream tmpStream = null; 118 try { 119 remoteInput = 120 getStorage().objects().get(bucketName, filename).executeMediaAsInputStream(); 121 // The input stream from api call can not be reset. Change it to ByteArrayInputStream. 122 tmpStream = new ByteArrayOutputStream(); 123 StreamUtil.copyStreams(remoteInput, tmpStream); 124 return new ByteArrayInputStream(tmpStream.toByteArray()); 125 } finally { 126 StreamUtil.close(remoteInput); 127 StreamUtil.close(tmpStream); 128 } 129 } 130 downloadFile(String remotePath, File destFile)131 public void downloadFile(String remotePath, File destFile) throws Exception { 132 String[] pathParts = parseGcsPath(remotePath); 133 downloadFile(pathParts[0], pathParts[1], destFile); 134 } 135 136 @VisibleForTesting downloadFile(String bucketName, String remoteFilename, File localFile)137 protected void downloadFile(String bucketName, String remoteFilename, File localFile) 138 throws Exception { 139 int i = 0; 140 try { 141 do { 142 i++; 143 try { 144 if (!isRemoteFolder(bucketName, remoteFilename)) { 145 fetchRemoteFile(bucketName, remoteFilename, localFile); 146 return; 147 } 148 remoteFilename = sanitizeDirectoryName(remoteFilename); 149 recursiveDownloadFolder(bucketName, remoteFilename, localFile); 150 return; 151 } catch (IOException e) { 152 // Allow one retry in case of flaky connection. 153 if (i >= 2) { 154 throw e; 155 } 156 // Allow `Read timed out` exception to be retried. 157 if (!(e instanceof SocketException) 158 && !"Read timed out".equals(e.getMessage())) { 159 throw e; 160 } 161 CLog.e( 162 "Error '%s' while downloading gs://%s/%s. retrying.", 163 e.getMessage(), bucketName, remoteFilename); 164 CLog.e(e); 165 } 166 } while (true); 167 } catch (IOException e) { 168 String message = 169 String.format( 170 "Failed to download gs://%s/%s due to: %s", 171 bucketName, remoteFilename, e.getMessage()); 172 CLog.e(message); 173 CLog.e(e); 174 throw new IOException(message, e); 175 } 176 } 177 listRemoteFilesUnderFolder( String bucketName, String folder, List<StorageObject> subFiles, List<String> subFolders)178 protected void listRemoteFilesUnderFolder( 179 String bucketName, String folder, List<StorageObject> subFiles, List<String> subFolders) 180 throws IOException { 181 String pageToken = null; 182 while (true) { 183 com.google.api.services.storage.Storage.Objects.List listOperation = 184 getStorage() 185 .objects() 186 .list(bucketName) 187 .setPrefix(folder) 188 .setDelimiter(PATH_SEP) 189 .setMaxResults(LIST_BATCH_SIZE); 190 if (pageToken != null) { 191 listOperation.setPageToken(pageToken); 192 } 193 Objects objects = listOperation.execute(); 194 if (objects.getItems() != null && !objects.getItems().isEmpty()) { 195 for (int i = 0; i < objects.getItems().size(); i++) { 196 if (objects.getItems().get(i).getName().equals(folder)) { 197 // If the folder is created from UI, the folder itself 198 // is a size 0 text file and its name will be 199 // the folder's name, we should ignore this file. 200 continue; 201 } 202 subFiles.add(objects.getItems().get(i)); 203 } 204 } 205 if (objects.getPrefixes() != null && !objects.getPrefixes().isEmpty()) { 206 // size 0 sub-folders will also be listed under the prefix. 207 // So this includes all the sub-folders. 208 subFolders.addAll(objects.getPrefixes()); 209 } 210 pageToken = objects.getNextPageToken(); 211 if (pageToken == null) { 212 return; 213 } 214 } 215 } 216 parseGcsPath(String remotePath)217 protected String[] parseGcsPath(String remotePath) throws Exception { 218 if (remotePath.startsWith(GCS_APPROX_PREFIX) && !remotePath.startsWith(GCS_PREFIX)) { 219 // File object remove double // so we have to rebuild it in some cases 220 remotePath = remotePath.replaceAll(GCS_APPROX_PREFIX, GCS_PREFIX); 221 } 222 Matcher m = GCS_PATH_PATTERN.matcher(remotePath); 223 if (!m.find()) { 224 throw new IOException( 225 String.format("Only GCS path is supported, %s is not supported", remotePath)); 226 } 227 return new String[] {m.group(1), m.group(2)}; 228 } 229 sanitizeDirectoryName(String name)230 public String sanitizeDirectoryName(String name) { 231 /** Folder name should end with "/" */ 232 if (!name.endsWith(PATH_SEP)) { 233 name += PATH_SEP; 234 } 235 return name; 236 } 237 238 /** 239 * Check given filename is a folder or not. 240 * 241 * <p>There 2 types of folders in gcs: 1. Created explicitly from UI. The folder is a size 0 242 * text file (it's an object). 2. When upload a file, all its parent folders will be created, 243 * but these folders doesn't exist (not objects) in gcs. This function work for both cases. But 244 * we should not try to download the size 0 folders. 245 * 246 * @param bucketName is the gcs bucket name. 247 * @param filename is the relative path to the bucket. 248 * @return true if the filename is a folder, otherwise false. 249 */ isRemoteFolder(String bucketName, String filename)250 public boolean isRemoteFolder(String bucketName, String filename) throws IOException { 251 filename = sanitizeDirectoryName(filename); 252 Objects objects = 253 getStorage() 254 .objects() 255 .list(bucketName) 256 .setPrefix(filename) 257 .setDelimiter(PATH_SEP) 258 .setMaxResults(1L) 259 .execute(); 260 if (objects.getItems() != null && !objects.getItems().isEmpty()) { 261 // The filename is end with '/', if there are objects use filename as prefix 262 // then filename must be a folder. 263 return true; 264 } 265 if (objects.getPrefixes() != null && !objects.getPrefixes().isEmpty()) { 266 // This will happen when the folder only contains folders but no objects. 267 // objects.getItems() will be empty, but objects.getPrefixes will list 268 // sub-folders. 269 return true; 270 } 271 return false; 272 } 273 fetchRemoteFile(String bucketName, String remoteFilename, File localFile)274 void fetchRemoteFile(String bucketName, String remoteFilename, File localFile) 275 throws IOException { 276 CLog.d("Fetching gs://%s/%s to %s.", bucketName, remoteFilename, localFile.toString()); 277 StorageObject meta = getRemoteFileMetaData(bucketName, remoteFilename); 278 if (meta == null || meta.getSize().equals(BigInteger.ZERO)) { 279 if (!mCreateEmptyFile) { 280 throw new IOException( 281 String.format( 282 "File (not folder) gs://%s/%s doesn't exist or is size 0.", 283 bucketName, remoteFilename)); 284 } else { 285 // Create the empty file. 286 CLog.d("GCS file is empty: gs://%s/%s", bucketName, remoteFilename); 287 localFile.createNewFile(); 288 return; 289 } 290 } 291 try (OutputStream writeStream = new FileOutputStream(localFile)) { 292 getStorage() 293 .objects() 294 .get(bucketName, remoteFilename) 295 .executeMediaAndDownloadTo(writeStream); 296 } 297 } 298 299 /** 300 * Recursively download remote folder to local folder. 301 * 302 * @param bucketName the gcs bucket name 303 * @param remoteFolderName remote folder name, must end with "/" 304 * @param localFolder local folder 305 * @throws IOException 306 */ recursiveDownloadFolder( String bucketName, String remoteFolderName, File localFolder)307 private void recursiveDownloadFolder( 308 String bucketName, String remoteFolderName, File localFolder) throws IOException { 309 CLog.d("Downloading folder gs://%s/%s.", bucketName, remoteFolderName); 310 if (!localFolder.exists()) { 311 FileUtil.mkdirsRWX(localFolder); 312 } 313 if (!localFolder.isDirectory()) { 314 String error = 315 String.format( 316 "%s is not a folder. (gs://%s/%s)", 317 localFolder, bucketName, remoteFolderName); 318 CLog.e(error); 319 throw new IOException(error); 320 } 321 Set<String> subFilenames = new HashSet<>(Arrays.asList(localFolder.list())); 322 List<String> subRemoteFolders = new ArrayList<>(); 323 List<StorageObject> subRemoteFiles = new ArrayList<>(); 324 listRemoteFilesUnderFolder(bucketName, remoteFolderName, subRemoteFiles, subRemoteFolders); 325 for (StorageObject subRemoteFile : subRemoteFiles) { 326 String subFilename = Paths.get(subRemoteFile.getName()).getFileName().toString(); 327 fetchRemoteFile( 328 bucketName, subRemoteFile.getName(), new File(localFolder, subFilename)); 329 subFilenames.remove(subFilename); 330 } 331 for (String subRemoteFolder : subRemoteFolders) { 332 String subFolderName = Paths.get(subRemoteFolder).getFileName().toString(); 333 File subFolder = new File(localFolder, subFolderName); 334 if (new File(localFolder, subFolderName).exists() 335 && !new File(localFolder, subFolderName).isDirectory()) { 336 CLog.w("%s exists as a non-directory.", subFolder); 337 subFolder = new File(localFolder, subFolderName + "_folder"); 338 } 339 recursiveDownloadFolder(bucketName, subRemoteFolder, subFolder); 340 subFilenames.remove(subFolder.getName()); 341 } 342 for (String subFilename : subFilenames) { 343 FileUtil.recursiveDelete(new File(localFolder, subFilename)); 344 } 345 } 346 347 @VisibleForTesting createTempFile(String remoteFilePath, File rootDir)348 protected File createTempFile(String remoteFilePath, File rootDir) throws Exception { 349 return createTempFileForRemote(remoteFilePath, rootDir); 350 } 351 352 /** 353 * Creates a unique file on temporary disk to house downloaded file with given path. 354 * 355 * <p>Constructs the file name based on base file name from path 356 * 357 * @param remoteFilePath the remote path to construct the name from 358 */ createTempFileForRemote(String remoteFilePath, File rootDir)359 public static File createTempFileForRemote(String remoteFilePath, File rootDir) 360 throws Exception { 361 // create a unique file. 362 File tmpFile = FileUtil.createTempFileForRemote(remoteFilePath, rootDir); 363 // now delete it so name is available 364 tmpFile.delete(); 365 return tmpFile; 366 } 367 } 368