1#!/usr/bin/env python 2# 3# Copyright 2015 Google Inc. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""HTTP wrapper for apitools. 18 19This library wraps the underlying http library we use, which is 20currently httplib2. 21""" 22 23import collections 24import contextlib 25import logging 26import socket 27import time 28 29import httplib2 30import six 31from six.moves import http_client 32from six.moves.urllib import parse 33 34from apitools.base.py import exceptions 35from apitools.base.py import util 36 37# pylint: disable=ungrouped-imports 38try: 39 from oauth2client.client import HttpAccessTokenRefreshError as TokenRefreshError # noqa 40except ImportError: 41 from oauth2client.client import AccessTokenRefreshError as TokenRefreshError # noqa 42 43__all__ = [ 44 'CheckResponse', 45 'GetHttp', 46 'HandleExceptionsAndRebuildHttpConnections', 47 'MakeRequest', 48 'RebuildHttpConnections', 49 'Request', 50 'Response', 51 'RethrowExceptionHandler', 52] 53 54 55# 308 and 429 don't have names in httplib. 56RESUME_INCOMPLETE = 308 57TOO_MANY_REQUESTS = 429 58_REDIRECT_STATUS_CODES = ( 59 http_client.MOVED_PERMANENTLY, 60 http_client.FOUND, 61 http_client.SEE_OTHER, 62 http_client.TEMPORARY_REDIRECT, 63 RESUME_INCOMPLETE, 64) 65 66# http: An httplib2.Http instance. 67# http_request: A http_wrapper.Request. 68# exc: Exception being raised. 69# num_retries: Number of retries consumed; used for exponential backoff. 70ExceptionRetryArgs = collections.namedtuple( 71 'ExceptionRetryArgs', ['http', 'http_request', 'exc', 'num_retries', 72 'max_retry_wait', 'total_wait_sec']) 73 74 75@contextlib.contextmanager 76def _Httplib2Debuglevel(http_request, level, http=None): 77 """Temporarily change the value of httplib2.debuglevel, if necessary. 78 79 If http_request has a `loggable_body` distinct from `body`, then we 80 need to prevent httplib2 from logging the full body. This sets 81 httplib2.debuglevel for the duration of the `with` block; however, 82 that alone won't change the value of existing HTTP connections. If 83 an httplib2.Http object is provided, we'll also change the level on 84 any cached connections attached to it. 85 86 Args: 87 http_request: a Request we're logging. 88 level: (int) the debuglevel for logging. 89 http: (optional) an httplib2.Http whose connections we should 90 set the debuglevel on. 91 92 Yields: 93 None. 94 """ 95 if http_request.loggable_body is None: 96 yield 97 return 98 old_level = httplib2.debuglevel 99 http_levels = {} 100 httplib2.debuglevel = level 101 if http is not None: 102 for connection_key, connection in http.connections.items(): 103 # httplib2 stores two kinds of values in this dict, connection 104 # classes and instances. Since the connection types are all 105 # old-style classes, we can't easily distinguish by connection 106 # type -- so instead we use the key pattern. 107 if ':' not in connection_key: 108 continue 109 http_levels[connection_key] = connection.debuglevel 110 connection.set_debuglevel(level) 111 yield 112 httplib2.debuglevel = old_level 113 if http is not None: 114 for connection_key, old_level in http_levels.items(): 115 if connection_key in http.connections: 116 http.connections[connection_key].set_debuglevel(old_level) 117 118 119class Request(object): 120 121 """Class encapsulating the data for an HTTP request.""" 122 123 def __init__(self, url='', http_method='GET', headers=None, body=''): 124 self.url = url 125 self.http_method = http_method 126 self.headers = headers or {} 127 self.__body = None 128 self.__loggable_body = None 129 self.body = body 130 131 @property 132 def loggable_body(self): 133 return self.__loggable_body 134 135 @loggable_body.setter 136 def loggable_body(self, value): 137 if self.body is None: 138 raise exceptions.RequestError( 139 'Cannot set loggable body on request with no body') 140 self.__loggable_body = value 141 142 @property 143 def body(self): 144 return self.__body 145 146 @body.setter 147 def body(self, value): 148 """Sets the request body; handles logging and length measurement.""" 149 self.__body = value 150 if value is not None: 151 # Avoid calling len() which cannot exceed 4GiB in 32-bit python. 152 body_length = getattr( 153 self.__body, 'length', None) or len(self.__body) 154 self.headers['content-length'] = str(body_length) 155 else: 156 self.headers.pop('content-length', None) 157 # This line ensures we don't try to print large requests. 158 if not isinstance(value, (type(None), six.string_types)): 159 self.loggable_body = '<media body>' 160 161 162# Note: currently the order of fields here is important, since we want 163# to be able to pass in the result from httplib2.request. 164class Response(collections.namedtuple( 165 'HttpResponse', ['info', 'content', 'request_url'])): 166 167 """Class encapsulating data for an HTTP response.""" 168 __slots__ = () 169 170 def __len__(self): 171 return self.length 172 173 @property 174 def length(self): 175 """Return the length of this response. 176 177 We expose this as an attribute since using len() directly can fail 178 for responses larger than sys.maxint. 179 180 Returns: 181 Response length (as int or long) 182 """ 183 def ProcessContentRange(content_range): 184 _, _, range_spec = content_range.partition(' ') 185 byte_range, _, _ = range_spec.partition('/') 186 start, _, end = byte_range.partition('-') 187 return int(end) - int(start) + 1 188 189 if '-content-encoding' in self.info and 'content-range' in self.info: 190 # httplib2 rewrites content-length in the case of a compressed 191 # transfer; we can't trust the content-length header in that 192 # case, but we *can* trust content-range, if it's present. 193 return ProcessContentRange(self.info['content-range']) 194 elif 'content-length' in self.info: 195 return int(self.info.get('content-length')) 196 elif 'content-range' in self.info: 197 return ProcessContentRange(self.info['content-range']) 198 return len(self.content) 199 200 @property 201 def status_code(self): 202 return int(self.info['status']) 203 204 @property 205 def retry_after(self): 206 if 'retry-after' in self.info: 207 return int(self.info['retry-after']) 208 209 @property 210 def is_redirect(self): 211 return (self.status_code in _REDIRECT_STATUS_CODES and 212 'location' in self.info) 213 214 215def CheckResponse(response): 216 if response is None: 217 # Caller shouldn't call us if the response is None, but handle anyway. 218 raise exceptions.RequestError( 219 'Request to url %s did not return a response.' % 220 response.request_url) 221 elif (response.status_code >= 500 or 222 response.status_code == TOO_MANY_REQUESTS): 223 raise exceptions.BadStatusCodeError.FromResponse(response) 224 elif response.retry_after: 225 raise exceptions.RetryAfterError.FromResponse(response) 226 227 228def RebuildHttpConnections(http): 229 """Rebuilds all http connections in the httplib2.Http instance. 230 231 httplib2 overloads the map in http.connections to contain two different 232 types of values: 233 { scheme string: connection class } and 234 { scheme + authority string : actual http connection } 235 Here we remove all of the entries for actual connections so that on the 236 next request httplib2 will rebuild them from the connection types. 237 238 Args: 239 http: An httplib2.Http instance. 240 """ 241 if getattr(http, 'connections', None): 242 for conn_key in list(http.connections.keys()): 243 if ':' in conn_key: 244 del http.connections[conn_key] 245 246 247def RethrowExceptionHandler(*unused_args): 248 # pylint: disable=misplaced-bare-raise 249 raise 250 251 252def HandleExceptionsAndRebuildHttpConnections(retry_args): 253 """Exception handler for http failures. 254 255 This catches known failures and rebuilds the underlying HTTP connections. 256 257 Args: 258 retry_args: An ExceptionRetryArgs tuple. 259 """ 260 # If the server indicates how long to wait, use that value. Otherwise, 261 # calculate the wait time on our own. 262 retry_after = None 263 264 # Transport failures 265 if isinstance(retry_args.exc, (http_client.BadStatusLine, 266 http_client.IncompleteRead, 267 http_client.ResponseNotReady)): 268 logging.debug('Caught HTTP error %s, retrying: %s', 269 type(retry_args.exc).__name__, retry_args.exc) 270 elif isinstance(retry_args.exc, socket.error): 271 logging.debug('Caught socket error, retrying: %s', retry_args.exc) 272 elif isinstance(retry_args.exc, socket.gaierror): 273 logging.debug( 274 'Caught socket address error, retrying: %s', retry_args.exc) 275 elif isinstance(retry_args.exc, socket.timeout): 276 logging.debug( 277 'Caught socket timeout error, retrying: %s', retry_args.exc) 278 elif isinstance(retry_args.exc, httplib2.ServerNotFoundError): 279 logging.debug( 280 'Caught server not found error, retrying: %s', retry_args.exc) 281 elif isinstance(retry_args.exc, ValueError): 282 # oauth2client tries to JSON-decode the response, which can result 283 # in a ValueError if the response was invalid. Until that is fixed in 284 # oauth2client, need to handle it here. 285 logging.debug('Response content was invalid (%s), retrying', 286 retry_args.exc) 287 elif (isinstance(retry_args.exc, TokenRefreshError) and 288 hasattr(retry_args.exc, 'status') and 289 (retry_args.exc.status == TOO_MANY_REQUESTS or 290 retry_args.exc.status >= 500)): 291 logging.debug( 292 'Caught transient credential refresh error (%s), retrying', 293 retry_args.exc) 294 elif isinstance(retry_args.exc, exceptions.RequestError): 295 logging.debug('Request returned no response, retrying') 296 # API-level failures 297 elif isinstance(retry_args.exc, exceptions.BadStatusCodeError): 298 logging.debug('Response returned status %s, retrying', 299 retry_args.exc.status_code) 300 elif isinstance(retry_args.exc, exceptions.RetryAfterError): 301 logging.debug('Response returned a retry-after header, retrying') 302 retry_after = retry_args.exc.retry_after 303 else: 304 raise retry_args.exc 305 RebuildHttpConnections(retry_args.http) 306 logging.debug('Retrying request to url %s after exception %s', 307 retry_args.http_request.url, retry_args.exc) 308 time.sleep( 309 retry_after or util.CalculateWaitForRetry( 310 retry_args.num_retries, max_wait=retry_args.max_retry_wait)) 311 312 313def MakeRequest(http, http_request, retries=7, max_retry_wait=60, 314 redirections=5, 315 retry_func=HandleExceptionsAndRebuildHttpConnections, 316 check_response_func=CheckResponse): 317 """Send http_request via the given http, performing error/retry handling. 318 319 Args: 320 http: An httplib2.Http instance, or a http multiplexer that delegates to 321 an underlying http, for example, HTTPMultiplexer. 322 http_request: A Request to send. 323 retries: (int, default 7) Number of retries to attempt on retryable 324 replies (such as 429 or 5XX). 325 max_retry_wait: (int, default 60) Maximum number of seconds to wait 326 when retrying. 327 redirections: (int, default 5) Number of redirects to follow. 328 retry_func: Function to handle retries on exceptions. Argument is an 329 ExceptionRetryArgs tuple. 330 check_response_func: Function to validate the HTTP response. 331 Arguments are (Response, response content, url). 332 333 Raises: 334 InvalidDataFromServerError: if there is no response after retries. 335 336 Returns: 337 A Response object. 338 339 """ 340 retry = 0 341 first_req_time = time.time() 342 # Provide compatibility for breaking change in httplib2 0.16.0+: 343 # https://github.com/googleapis/google-api-python-client/issues/803 344 if hasattr(http, 'redirect_codes'): 345 http.redirect_codes = set(http.redirect_codes) - {308} 346 while True: 347 try: 348 return _MakeRequestNoRetry( 349 http, http_request, redirections=redirections, 350 check_response_func=check_response_func) 351 # retry_func will consume the exception types it handles and raise. 352 # pylint: disable=broad-except 353 except Exception as e: 354 retry += 1 355 if retry >= retries: 356 raise 357 else: 358 total_wait_sec = time.time() - first_req_time 359 retry_func(ExceptionRetryArgs(http, http_request, e, retry, 360 max_retry_wait, total_wait_sec)) 361 362 363def _MakeRequestNoRetry(http, http_request, redirections=5, 364 check_response_func=CheckResponse): 365 """Send http_request via the given http. 366 367 This wrapper exists to handle translation between the plain httplib2 368 request/response types and the Request and Response types above. 369 370 Args: 371 http: An httplib2.Http instance, or a http multiplexer that delegates to 372 an underlying http, for example, HTTPMultiplexer. 373 http_request: A Request to send. 374 redirections: (int, default 5) Number of redirects to follow. 375 check_response_func: Function to validate the HTTP response. 376 Arguments are (Response, response content, url). 377 378 Returns: 379 A Response object. 380 381 Raises: 382 RequestError if no response could be parsed. 383 384 """ 385 connection_type = None 386 # Handle overrides for connection types. This is used if the caller 387 # wants control over the underlying connection for managing callbacks 388 # or hash digestion. 389 if getattr(http, 'connections', None): 390 url_scheme = parse.urlsplit(http_request.url).scheme 391 if url_scheme and url_scheme in http.connections: 392 connection_type = http.connections[url_scheme] 393 394 # Custom printing only at debuglevel 4 395 new_debuglevel = 4 if httplib2.debuglevel == 4 else 0 396 with _Httplib2Debuglevel(http_request, new_debuglevel, http=http): 397 info, content = http.request( 398 str(http_request.url), method=str(http_request.http_method), 399 body=http_request.body, headers=http_request.headers, 400 redirections=redirections, connection_type=connection_type) 401 402 if info is None: 403 raise exceptions.RequestError() 404 405 response = Response(info, content, http_request.url) 406 check_response_func(response) 407 return response 408 409 410_HTTP_FACTORIES = [] 411 412 413def _RegisterHttpFactory(factory): 414 _HTTP_FACTORIES.append(factory) 415 416 417def GetHttp(**kwds): 418 for factory in _HTTP_FACTORIES: 419 http = factory(**kwds) 420 if http is not None: 421 return http 422 return httplib2.Http(**kwds) 423