1# Copyright 2021 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Utility module for HTTP.""" 15import json 16import logging 17import os 18import sys 19import tempfile 20import zipfile 21 22import requests 23 24# pylint: disable=wrong-import-position,import-error 25sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 26import retry 27 28_DOWNLOAD_URL_RETRIES = 3 29_DOWNLOAD_URL_BACKOFF = 1 30 31 32def download_and_unpack_zip(url, extract_directory, headers=None): 33 """Downloads and unpacks a zip file from an HTTP URL. 34 35 Args: 36 url: A url to the zip file to be downloaded and unpacked. 37 extract_directory: The path where the zip file should be extracted to. 38 headers: (Optional) HTTP headers to send with the download request. 39 40 Returns: 41 True on success. 42 """ 43 if headers is None: 44 headers = {} 45 46 if not os.path.exists(extract_directory): 47 logging.error('Extract directory: %s does not exist.', extract_directory) 48 return False 49 50 # Gives the temporary zip file a unique identifier in the case that 51 # that download_and_unpack_zip is done in parallel. 52 with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file: 53 if not download_url(url, tmp_file.name, headers=headers): 54 return False 55 56 try: 57 with zipfile.ZipFile(tmp_file.name, 'r') as zip_file: 58 zip_file.extractall(extract_directory) 59 except zipfile.BadZipFile: 60 logging.error('Error unpacking zip from %s. Bad Zipfile.', url) 61 return False 62 63 return True 64 65 66def download_url(*args, **kwargs): 67 """Wrapper around _download_url that returns False if _download_url 68 exceptions.""" 69 try: 70 return _download_url(*args, **kwargs) 71 except Exception: # pylint: disable=broad-except 72 return False 73 74 75def get_json_from_url(url): 76 """Gets a json object from a specified HTTP URL. 77 78 Args: 79 url: The url of the json to be downloaded. 80 81 Returns: 82 A dictionary deserialized from JSON or None on failure. 83 """ 84 response = requests.get(url) 85 try: 86 return response.json() 87 except (ValueError, TypeError, json.JSONDecodeError) as err: 88 logging.error('Loading json from url %s failed with: %s.', url, str(err)) 89 return None 90 91 92@retry.wrap(_DOWNLOAD_URL_RETRIES, _DOWNLOAD_URL_BACKOFF) 93def _download_url(url, filename, headers=None): 94 """Downloads the file located at |url|, using HTTP to |filename|. 95 96 Args: 97 url: A url to a file to download. 98 filename: The path the file should be downloaded to. 99 headers: (Optional) HTTP headers to send with the download request. 100 101 Returns: 102 True on success. 103 """ 104 if headers is None: 105 headers = {} 106 107 response = requests.get(url, headers=headers) 108 109 if response.status_code != 200: 110 logging.error('Unable to download from: %s. Code: %d. Content: %s.', url, 111 response.status_code, response.content) 112 return False 113 114 with open(filename, 'wb') as file_handle: 115 file_handle.write(response.content) 116 117 return True 118