• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Utility module for HTTP."""
15import json
16import logging
17import os
18import sys
19import tempfile
20import zipfile
21
22import requests
23
24# pylint: disable=wrong-import-position,import-error
25sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
26import retry
27
28_DOWNLOAD_URL_RETRIES = 3
29_DOWNLOAD_URL_BACKOFF = 1
30
31
32def download_and_unpack_zip(url, extract_directory, headers=None):
33  """Downloads and unpacks a zip file from an HTTP URL.
34
35  Args:
36    url: A url to the zip file to be downloaded and unpacked.
37    extract_directory: The path where the zip file should be extracted to.
38    headers: (Optional) HTTP headers to send with the download request.
39
40  Returns:
41    True on success.
42  """
43  if headers is None:
44    headers = {}
45
46  if not os.path.exists(extract_directory):
47    logging.error('Extract directory: %s does not exist.', extract_directory)
48    return False
49
50  # Gives the temporary zip file a unique identifier in the case that
51  # that download_and_unpack_zip is done in parallel.
52  with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file:
53    if not download_url(url, tmp_file.name, headers=headers):
54      return False
55
56    try:
57      with zipfile.ZipFile(tmp_file.name, 'r') as zip_file:
58        zip_file.extractall(extract_directory)
59    except zipfile.BadZipFile:
60      logging.error('Error unpacking zip from %s. Bad Zipfile.', url)
61      return False
62
63  return True
64
65
66def download_url(*args, **kwargs):
67  """Wrapper around _download_url that returns False if _download_url
68  exceptions."""
69  try:
70    return _download_url(*args, **kwargs)
71  except Exception:  # pylint: disable=broad-except
72    return False
73
74
75def get_json_from_url(url):
76  """Gets a json object from a specified HTTP URL.
77
78  Args:
79    url: The url of the json to be downloaded.
80
81  Returns:
82    A dictionary deserialized from JSON or None on failure.
83  """
84  response = requests.get(url)
85  try:
86    return response.json()
87  except (ValueError, TypeError, json.JSONDecodeError) as err:
88    logging.error('Loading json from url %s failed with: %s.', url, str(err))
89    return None
90
91
92@retry.wrap(_DOWNLOAD_URL_RETRIES, _DOWNLOAD_URL_BACKOFF)
93def _download_url(url, filename, headers=None):
94  """Downloads the file located at |url|, using HTTP to |filename|.
95
96  Args:
97    url: A url to a file to download.
98    filename: The path the file should be downloaded to.
99    headers: (Optional) HTTP headers to send with the download request.
100
101  Returns:
102    True on success.
103  """
104  if headers is None:
105    headers = {}
106
107  response = requests.get(url, headers=headers)
108
109  if response.status_code != 200:
110    logging.error('Unable to download from: %s. Code: %d. Content: %s.', url,
111                  response.status_code, response.content)
112    return False
113
114  with open(filename, 'wb') as file_handle:
115    file_handle.write(response.content)
116
117  return True
118