1#!/usr/bin/env python 2# 3# Copyright 2015 Google Inc. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""HTTP wrapper for apitools. 18 19This library wraps the underlying http library we use, which is 20currently httplib2. 21""" 22 23import collections 24import contextlib 25import logging 26import socket 27import time 28 29import httplib2 30import oauth2client 31import six 32from six.moves import http_client 33from six.moves.urllib import parse 34 35from apitools.base.py import exceptions 36from apitools.base.py import util 37 38__all__ = [ 39 'CheckResponse', 40 'GetHttp', 41 'HandleExceptionsAndRebuildHttpConnections', 42 'MakeRequest', 43 'RebuildHttpConnections', 44 'Request', 45 'Response', 46 'RethrowExceptionHandler', 47] 48 49 50# 308 and 429 don't have names in httplib. 51RESUME_INCOMPLETE = 308 52TOO_MANY_REQUESTS = 429 53_REDIRECT_STATUS_CODES = ( 54 http_client.MOVED_PERMANENTLY, 55 http_client.FOUND, 56 http_client.SEE_OTHER, 57 http_client.TEMPORARY_REDIRECT, 58 RESUME_INCOMPLETE, 59) 60 61# http: An httplib2.Http instance. 62# http_request: A http_wrapper.Request. 63# exc: Exception being raised. 64# num_retries: Number of retries consumed; used for exponential backoff. 65ExceptionRetryArgs = collections.namedtuple( 66 'ExceptionRetryArgs', ['http', 'http_request', 'exc', 'num_retries', 67 'max_retry_wait', 'total_wait_sec']) 68 69 70@contextlib.contextmanager 71def _Httplib2Debuglevel(http_request, level, http=None): 72 """Temporarily change the value of httplib2.debuglevel, if necessary. 73 74 If http_request has a `loggable_body` distinct from `body`, then we 75 need to prevent httplib2 from logging the full body. This sets 76 httplib2.debuglevel for the duration of the `with` block; however, 77 that alone won't change the value of existing HTTP connections. If 78 an httplib2.Http object is provided, we'll also change the level on 79 any cached connections attached to it. 80 81 Args: 82 http_request: a Request we're logging. 83 level: (int) the debuglevel for logging. 84 http: (optional) an httplib2.Http whose connections we should 85 set the debuglevel on. 86 87 Yields: 88 None. 89 """ 90 if http_request.loggable_body is None: 91 yield 92 return 93 old_level = httplib2.debuglevel 94 http_levels = {} 95 httplib2.debuglevel = level 96 if http is not None: 97 for connection_key, connection in http.connections.items(): 98 # httplib2 stores two kinds of values in this dict, connection 99 # classes and instances. Since the connection types are all 100 # old-style classes, we can't easily distinguish by connection 101 # type -- so instead we use the key pattern. 102 if ':' not in connection_key: 103 continue 104 http_levels[connection_key] = connection.debuglevel 105 connection.set_debuglevel(level) 106 yield 107 httplib2.debuglevel = old_level 108 if http is not None: 109 for connection_key, old_level in http_levels.items(): 110 if connection_key in http.connections: 111 http.connections[connection_key].set_debuglevel(old_level) 112 113 114class Request(object): 115 116 """Class encapsulating the data for an HTTP request.""" 117 118 def __init__(self, url='', http_method='GET', headers=None, body=''): 119 self.url = url 120 self.http_method = http_method 121 self.headers = headers or {} 122 self.__body = None 123 self.__loggable_body = None 124 self.body = body 125 126 @property 127 def loggable_body(self): 128 return self.__loggable_body 129 130 @loggable_body.setter 131 def loggable_body(self, value): 132 if self.body is None: 133 raise exceptions.RequestError( 134 'Cannot set loggable body on request with no body') 135 self.__loggable_body = value 136 137 @property 138 def body(self): 139 return self.__body 140 141 @body.setter 142 def body(self, value): 143 """Sets the request body; handles logging and length measurement.""" 144 self.__body = value 145 if value is not None: 146 # Avoid calling len() which cannot exceed 4GiB in 32-bit python. 147 body_length = getattr( 148 self.__body, 'length', None) or len(self.__body) 149 self.headers['content-length'] = str(body_length) 150 else: 151 self.headers.pop('content-length', None) 152 # This line ensures we don't try to print large requests. 153 if not isinstance(value, (type(None), six.string_types)): 154 self.loggable_body = '<media body>' 155 156 157# Note: currently the order of fields here is important, since we want 158# to be able to pass in the result from httplib2.request. 159class Response(collections.namedtuple( 160 'HttpResponse', ['info', 'content', 'request_url'])): 161 162 """Class encapsulating data for an HTTP response.""" 163 __slots__ = () 164 165 def __len__(self): 166 return self.length 167 168 @property 169 def length(self): 170 """Return the length of this response. 171 172 We expose this as an attribute since using len() directly can fail 173 for responses larger than sys.maxint. 174 175 Returns: 176 Response length (as int or long) 177 """ 178 def ProcessContentRange(content_range): 179 _, _, range_spec = content_range.partition(' ') 180 byte_range, _, _ = range_spec.partition('/') 181 start, _, end = byte_range.partition('-') 182 return int(end) - int(start) + 1 183 184 if '-content-encoding' in self.info and 'content-range' in self.info: 185 # httplib2 rewrites content-length in the case of a compressed 186 # transfer; we can't trust the content-length header in that 187 # case, but we *can* trust content-range, if it's present. 188 return ProcessContentRange(self.info['content-range']) 189 elif 'content-length' in self.info: 190 return int(self.info.get('content-length')) 191 elif 'content-range' in self.info: 192 return ProcessContentRange(self.info['content-range']) 193 return len(self.content) 194 195 @property 196 def status_code(self): 197 return int(self.info['status']) 198 199 @property 200 def retry_after(self): 201 if 'retry-after' in self.info: 202 return int(self.info['retry-after']) 203 204 @property 205 def is_redirect(self): 206 return (self.status_code in _REDIRECT_STATUS_CODES and 207 'location' in self.info) 208 209 210def CheckResponse(response): 211 if response is None: 212 # Caller shouldn't call us if the response is None, but handle anyway. 213 raise exceptions.RequestError( 214 'Request to url %s did not return a response.' % 215 response.request_url) 216 elif (response.status_code >= 500 or 217 response.status_code == TOO_MANY_REQUESTS): 218 raise exceptions.BadStatusCodeError.FromResponse(response) 219 elif response.retry_after: 220 raise exceptions.RetryAfterError.FromResponse(response) 221 222 223def RebuildHttpConnections(http): 224 """Rebuilds all http connections in the httplib2.Http instance. 225 226 httplib2 overloads the map in http.connections to contain two different 227 types of values: 228 { scheme string: connection class } and 229 { scheme + authority string : actual http connection } 230 Here we remove all of the entries for actual connections so that on the 231 next request httplib2 will rebuild them from the connection types. 232 233 Args: 234 http: An httplib2.Http instance. 235 """ 236 if getattr(http, 'connections', None): 237 for conn_key in list(http.connections.keys()): 238 if ':' in conn_key: 239 del http.connections[conn_key] 240 241 242def RethrowExceptionHandler(*unused_args): 243 # pylint: disable=misplaced-bare-raise 244 raise 245 246 247def HandleExceptionsAndRebuildHttpConnections(retry_args): 248 """Exception handler for http failures. 249 250 This catches known failures and rebuilds the underlying HTTP connections. 251 252 Args: 253 retry_args: An ExceptionRetryArgs tuple. 254 """ 255 # If the server indicates how long to wait, use that value. Otherwise, 256 # calculate the wait time on our own. 257 retry_after = None 258 259 # Transport failures 260 if isinstance(retry_args.exc, (http_client.BadStatusLine, 261 http_client.IncompleteRead, 262 http_client.ResponseNotReady)): 263 logging.debug('Caught HTTP error %s, retrying: %s', 264 type(retry_args.exc).__name__, retry_args.exc) 265 elif isinstance(retry_args.exc, socket.error): 266 logging.debug('Caught socket error, retrying: %s', retry_args.exc) 267 elif isinstance(retry_args.exc, socket.gaierror): 268 logging.debug( 269 'Caught socket address error, retrying: %s', retry_args.exc) 270 elif isinstance(retry_args.exc, socket.timeout): 271 logging.debug( 272 'Caught socket timeout error, retrying: %s', retry_args.exc) 273 elif isinstance(retry_args.exc, httplib2.ServerNotFoundError): 274 logging.debug( 275 'Caught server not found error, retrying: %s', retry_args.exc) 276 elif isinstance(retry_args.exc, ValueError): 277 # oauth2client tries to JSON-decode the response, which can result 278 # in a ValueError if the response was invalid. Until that is fixed in 279 # oauth2client, need to handle it here. 280 logging.debug('Response content was invalid (%s), retrying', 281 retry_args.exc) 282 elif (isinstance(retry_args.exc, 283 oauth2client.client.HttpAccessTokenRefreshError) and 284 (retry_args.exc.status == TOO_MANY_REQUESTS or 285 retry_args.exc.status >= 500)): 286 logging.debug( 287 'Caught transient credential refresh error (%s), retrying', 288 retry_args.exc) 289 elif isinstance(retry_args.exc, exceptions.RequestError): 290 logging.debug('Request returned no response, retrying') 291 # API-level failures 292 elif isinstance(retry_args.exc, exceptions.BadStatusCodeError): 293 logging.debug('Response returned status %s, retrying', 294 retry_args.exc.status_code) 295 elif isinstance(retry_args.exc, exceptions.RetryAfterError): 296 logging.debug('Response returned a retry-after header, retrying') 297 retry_after = retry_args.exc.retry_after 298 else: 299 raise # pylint: disable=misplaced-bare-raise 300 RebuildHttpConnections(retry_args.http) 301 logging.debug('Retrying request to url %s after exception %s', 302 retry_args.http_request.url, retry_args.exc) 303 time.sleep( 304 retry_after or util.CalculateWaitForRetry( 305 retry_args.num_retries, max_wait=retry_args.max_retry_wait)) 306 307 308def MakeRequest(http, http_request, retries=7, max_retry_wait=60, 309 redirections=5, 310 retry_func=HandleExceptionsAndRebuildHttpConnections, 311 check_response_func=CheckResponse): 312 """Send http_request via the given http, performing error/retry handling. 313 314 Args: 315 http: An httplib2.Http instance, or a http multiplexer that delegates to 316 an underlying http, for example, HTTPMultiplexer. 317 http_request: A Request to send. 318 retries: (int, default 7) Number of retries to attempt on retryable 319 replies (such as 429 or 5XX). 320 max_retry_wait: (int, default 60) Maximum number of seconds to wait 321 when retrying. 322 redirections: (int, default 5) Number of redirects to follow. 323 retry_func: Function to handle retries on exceptions. Argument is an 324 ExceptionRetryArgs tuple. 325 check_response_func: Function to validate the HTTP response. 326 Arguments are (Response, response content, url). 327 328 Raises: 329 InvalidDataFromServerError: if there is no response after retries. 330 331 Returns: 332 A Response object. 333 334 """ 335 retry = 0 336 first_req_time = time.time() 337 while True: 338 try: 339 return _MakeRequestNoRetry( 340 http, http_request, redirections=redirections, 341 check_response_func=check_response_func) 342 # retry_func will consume the exception types it handles and raise. 343 # pylint: disable=broad-except 344 except Exception as e: 345 retry += 1 346 if retry >= retries: 347 raise 348 else: 349 total_wait_sec = time.time() - first_req_time 350 retry_func(ExceptionRetryArgs(http, http_request, e, retry, 351 max_retry_wait, total_wait_sec)) 352 353 354def _MakeRequestNoRetry(http, http_request, redirections=5, 355 check_response_func=CheckResponse): 356 """Send http_request via the given http. 357 358 This wrapper exists to handle translation between the plain httplib2 359 request/response types and the Request and Response types above. 360 361 Args: 362 http: An httplib2.Http instance, or a http multiplexer that delegates to 363 an underlying http, for example, HTTPMultiplexer. 364 http_request: A Request to send. 365 redirections: (int, default 5) Number of redirects to follow. 366 check_response_func: Function to validate the HTTP response. 367 Arguments are (Response, response content, url). 368 369 Returns: 370 A Response object. 371 372 Raises: 373 RequestError if no response could be parsed. 374 375 """ 376 connection_type = None 377 # Handle overrides for connection types. This is used if the caller 378 # wants control over the underlying connection for managing callbacks 379 # or hash digestion. 380 if getattr(http, 'connections', None): 381 url_scheme = parse.urlsplit(http_request.url).scheme 382 if url_scheme and url_scheme in http.connections: 383 connection_type = http.connections[url_scheme] 384 385 # Custom printing only at debuglevel 4 386 new_debuglevel = 4 if httplib2.debuglevel == 4 else 0 387 with _Httplib2Debuglevel(http_request, new_debuglevel, http=http): 388 info, content = http.request( 389 str(http_request.url), method=str(http_request.http_method), 390 body=http_request.body, headers=http_request.headers, 391 redirections=redirections, connection_type=connection_type) 392 393 if info is None: 394 raise exceptions.RequestError() 395 396 response = Response(info, content, http_request.url) 397 check_response_func(response) 398 return response 399 400 401_HTTP_FACTORIES = [] 402 403 404def _RegisterHttpFactory(factory): 405 _HTTP_FACTORIES.append(factory) 406 407 408def GetHttp(**kwds): 409 for factory in _HTTP_FACTORIES: 410 http = factory(**kwds) 411 if http is not None: 412 return http 413 return httplib2.Http(**kwds) 414