• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright 2015 Google Inc.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""HTTP wrapper for apitools.
18
19This library wraps the underlying http library we use, which is
20currently httplib2.
21"""
22
23import collections
24import contextlib
25import logging
26import socket
27import time
28
29import httplib2
30import oauth2client
31import six
32from six.moves import http_client
33from six.moves.urllib import parse
34
35from apitools.base.py import exceptions
36from apitools.base.py import util
37
38__all__ = [
39    'CheckResponse',
40    'GetHttp',
41    'HandleExceptionsAndRebuildHttpConnections',
42    'MakeRequest',
43    'RebuildHttpConnections',
44    'Request',
45    'Response',
46    'RethrowExceptionHandler',
47]
48
49
50# 308 and 429 don't have names in httplib.
51RESUME_INCOMPLETE = 308
52TOO_MANY_REQUESTS = 429
53_REDIRECT_STATUS_CODES = (
54    http_client.MOVED_PERMANENTLY,
55    http_client.FOUND,
56    http_client.SEE_OTHER,
57    http_client.TEMPORARY_REDIRECT,
58    RESUME_INCOMPLETE,
59)
60
61# http: An httplib2.Http instance.
62# http_request: A http_wrapper.Request.
63# exc: Exception being raised.
64# num_retries: Number of retries consumed; used for exponential backoff.
65ExceptionRetryArgs = collections.namedtuple(
66    'ExceptionRetryArgs', ['http', 'http_request', 'exc', 'num_retries',
67                           'max_retry_wait', 'total_wait_sec'])
68
69
70@contextlib.contextmanager
71def _Httplib2Debuglevel(http_request, level, http=None):
72    """Temporarily change the value of httplib2.debuglevel, if necessary.
73
74    If http_request has a `loggable_body` distinct from `body`, then we
75    need to prevent httplib2 from logging the full body. This sets
76    httplib2.debuglevel for the duration of the `with` block; however,
77    that alone won't change the value of existing HTTP connections. If
78    an httplib2.Http object is provided, we'll also change the level on
79    any cached connections attached to it.
80
81    Args:
82      http_request: a Request we're logging.
83      level: (int) the debuglevel for logging.
84      http: (optional) an httplib2.Http whose connections we should
85        set the debuglevel on.
86
87    Yields:
88      None.
89    """
90    if http_request.loggable_body is None:
91        yield
92        return
93    old_level = httplib2.debuglevel
94    http_levels = {}
95    httplib2.debuglevel = level
96    if http is not None:
97        for connection_key, connection in http.connections.items():
98            # httplib2 stores two kinds of values in this dict, connection
99            # classes and instances. Since the connection types are all
100            # old-style classes, we can't easily distinguish by connection
101            # type -- so instead we use the key pattern.
102            if ':' not in connection_key:
103                continue
104            http_levels[connection_key] = connection.debuglevel
105            connection.set_debuglevel(level)
106    yield
107    httplib2.debuglevel = old_level
108    if http is not None:
109        for connection_key, old_level in http_levels.items():
110            if connection_key in http.connections:
111                http.connections[connection_key].set_debuglevel(old_level)
112
113
114class Request(object):
115
116    """Class encapsulating the data for an HTTP request."""
117
118    def __init__(self, url='', http_method='GET', headers=None, body=''):
119        self.url = url
120        self.http_method = http_method
121        self.headers = headers or {}
122        self.__body = None
123        self.__loggable_body = None
124        self.body = body
125
126    @property
127    def loggable_body(self):
128        return self.__loggable_body
129
130    @loggable_body.setter
131    def loggable_body(self, value):
132        if self.body is None:
133            raise exceptions.RequestError(
134                'Cannot set loggable body on request with no body')
135        self.__loggable_body = value
136
137    @property
138    def body(self):
139        return self.__body
140
141    @body.setter
142    def body(self, value):
143        """Sets the request body; handles logging and length measurement."""
144        self.__body = value
145        if value is not None:
146            # Avoid calling len() which cannot exceed 4GiB in 32-bit python.
147            body_length = getattr(
148                self.__body, 'length', None) or len(self.__body)
149            self.headers['content-length'] = str(body_length)
150        else:
151            self.headers.pop('content-length', None)
152        # This line ensures we don't try to print large requests.
153        if not isinstance(value, (type(None), six.string_types)):
154            self.loggable_body = '<media body>'
155
156
157# Note: currently the order of fields here is important, since we want
158# to be able to pass in the result from httplib2.request.
159class Response(collections.namedtuple(
160        'HttpResponse', ['info', 'content', 'request_url'])):
161
162    """Class encapsulating data for an HTTP response."""
163    __slots__ = ()
164
165    def __len__(self):
166        return self.length
167
168    @property
169    def length(self):
170        """Return the length of this response.
171
172        We expose this as an attribute since using len() directly can fail
173        for responses larger than sys.maxint.
174
175        Returns:
176          Response length (as int or long)
177        """
178        def ProcessContentRange(content_range):
179            _, _, range_spec = content_range.partition(' ')
180            byte_range, _, _ = range_spec.partition('/')
181            start, _, end = byte_range.partition('-')
182            return int(end) - int(start) + 1
183
184        if '-content-encoding' in self.info and 'content-range' in self.info:
185            # httplib2 rewrites content-length in the case of a compressed
186            # transfer; we can't trust the content-length header in that
187            # case, but we *can* trust content-range, if it's present.
188            return ProcessContentRange(self.info['content-range'])
189        elif 'content-length' in self.info:
190            return int(self.info.get('content-length'))
191        elif 'content-range' in self.info:
192            return ProcessContentRange(self.info['content-range'])
193        return len(self.content)
194
195    @property
196    def status_code(self):
197        return int(self.info['status'])
198
199    @property
200    def retry_after(self):
201        if 'retry-after' in self.info:
202            return int(self.info['retry-after'])
203
204    @property
205    def is_redirect(self):
206        return (self.status_code in _REDIRECT_STATUS_CODES and
207                'location' in self.info)
208
209
210def CheckResponse(response):
211    if response is None:
212        # Caller shouldn't call us if the response is None, but handle anyway.
213        raise exceptions.RequestError(
214            'Request to url %s did not return a response.' %
215            response.request_url)
216    elif (response.status_code >= 500 or
217          response.status_code == TOO_MANY_REQUESTS):
218        raise exceptions.BadStatusCodeError.FromResponse(response)
219    elif response.retry_after:
220        raise exceptions.RetryAfterError.FromResponse(response)
221
222
223def RebuildHttpConnections(http):
224    """Rebuilds all http connections in the httplib2.Http instance.
225
226    httplib2 overloads the map in http.connections to contain two different
227    types of values:
228    { scheme string:  connection class } and
229    { scheme + authority string : actual http connection }
230    Here we remove all of the entries for actual connections so that on the
231    next request httplib2 will rebuild them from the connection types.
232
233    Args:
234      http: An httplib2.Http instance.
235    """
236    if getattr(http, 'connections', None):
237        for conn_key in list(http.connections.keys()):
238            if ':' in conn_key:
239                del http.connections[conn_key]
240
241
242def RethrowExceptionHandler(*unused_args):
243    # pylint: disable=misplaced-bare-raise
244    raise
245
246
247def HandleExceptionsAndRebuildHttpConnections(retry_args):
248    """Exception handler for http failures.
249
250    This catches known failures and rebuilds the underlying HTTP connections.
251
252    Args:
253      retry_args: An ExceptionRetryArgs tuple.
254    """
255    # If the server indicates how long to wait, use that value.  Otherwise,
256    # calculate the wait time on our own.
257    retry_after = None
258
259    # Transport failures
260    if isinstance(retry_args.exc, (http_client.BadStatusLine,
261                                   http_client.IncompleteRead,
262                                   http_client.ResponseNotReady)):
263        logging.debug('Caught HTTP error %s, retrying: %s',
264                      type(retry_args.exc).__name__, retry_args.exc)
265    elif isinstance(retry_args.exc, socket.error):
266        logging.debug('Caught socket error, retrying: %s', retry_args.exc)
267    elif isinstance(retry_args.exc, socket.gaierror):
268        logging.debug(
269            'Caught socket address error, retrying: %s', retry_args.exc)
270    elif isinstance(retry_args.exc, socket.timeout):
271        logging.debug(
272            'Caught socket timeout error, retrying: %s', retry_args.exc)
273    elif isinstance(retry_args.exc, httplib2.ServerNotFoundError):
274        logging.debug(
275            'Caught server not found error, retrying: %s', retry_args.exc)
276    elif isinstance(retry_args.exc, ValueError):
277        # oauth2client tries to JSON-decode the response, which can result
278        # in a ValueError if the response was invalid. Until that is fixed in
279        # oauth2client, need to handle it here.
280        logging.debug('Response content was invalid (%s), retrying',
281                      retry_args.exc)
282    elif (isinstance(retry_args.exc,
283                     oauth2client.client.HttpAccessTokenRefreshError) and
284          (retry_args.exc.status == TOO_MANY_REQUESTS or
285           retry_args.exc.status >= 500)):
286        logging.debug(
287            'Caught transient credential refresh error (%s), retrying',
288            retry_args.exc)
289    elif isinstance(retry_args.exc, exceptions.RequestError):
290        logging.debug('Request returned no response, retrying')
291    # API-level failures
292    elif isinstance(retry_args.exc, exceptions.BadStatusCodeError):
293        logging.debug('Response returned status %s, retrying',
294                      retry_args.exc.status_code)
295    elif isinstance(retry_args.exc, exceptions.RetryAfterError):
296        logging.debug('Response returned a retry-after header, retrying')
297        retry_after = retry_args.exc.retry_after
298    else:
299        raise  # pylint: disable=misplaced-bare-raise
300    RebuildHttpConnections(retry_args.http)
301    logging.debug('Retrying request to url %s after exception %s',
302                  retry_args.http_request.url, retry_args.exc)
303    time.sleep(
304        retry_after or util.CalculateWaitForRetry(
305            retry_args.num_retries, max_wait=retry_args.max_retry_wait))
306
307
308def MakeRequest(http, http_request, retries=7, max_retry_wait=60,
309                redirections=5,
310                retry_func=HandleExceptionsAndRebuildHttpConnections,
311                check_response_func=CheckResponse):
312    """Send http_request via the given http, performing error/retry handling.
313
314    Args:
315      http: An httplib2.Http instance, or a http multiplexer that delegates to
316          an underlying http, for example, HTTPMultiplexer.
317      http_request: A Request to send.
318      retries: (int, default 7) Number of retries to attempt on retryable
319          replies (such as 429 or 5XX).
320      max_retry_wait: (int, default 60) Maximum number of seconds to wait
321          when retrying.
322      redirections: (int, default 5) Number of redirects to follow.
323      retry_func: Function to handle retries on exceptions. Argument is an
324          ExceptionRetryArgs tuple.
325      check_response_func: Function to validate the HTTP response.
326          Arguments are (Response, response content, url).
327
328    Raises:
329      InvalidDataFromServerError: if there is no response after retries.
330
331    Returns:
332      A Response object.
333
334    """
335    retry = 0
336    first_req_time = time.time()
337    while True:
338        try:
339            return _MakeRequestNoRetry(
340                http, http_request, redirections=redirections,
341                check_response_func=check_response_func)
342        # retry_func will consume the exception types it handles and raise.
343        # pylint: disable=broad-except
344        except Exception as e:
345            retry += 1
346            if retry >= retries:
347                raise
348            else:
349                total_wait_sec = time.time() - first_req_time
350                retry_func(ExceptionRetryArgs(http, http_request, e, retry,
351                                              max_retry_wait, total_wait_sec))
352
353
354def _MakeRequestNoRetry(http, http_request, redirections=5,
355                        check_response_func=CheckResponse):
356    """Send http_request via the given http.
357
358    This wrapper exists to handle translation between the plain httplib2
359    request/response types and the Request and Response types above.
360
361    Args:
362      http: An httplib2.Http instance, or a http multiplexer that delegates to
363          an underlying http, for example, HTTPMultiplexer.
364      http_request: A Request to send.
365      redirections: (int, default 5) Number of redirects to follow.
366      check_response_func: Function to validate the HTTP response.
367          Arguments are (Response, response content, url).
368
369    Returns:
370      A Response object.
371
372    Raises:
373      RequestError if no response could be parsed.
374
375    """
376    connection_type = None
377    # Handle overrides for connection types.  This is used if the caller
378    # wants control over the underlying connection for managing callbacks
379    # or hash digestion.
380    if getattr(http, 'connections', None):
381        url_scheme = parse.urlsplit(http_request.url).scheme
382        if url_scheme and url_scheme in http.connections:
383            connection_type = http.connections[url_scheme]
384
385    # Custom printing only at debuglevel 4
386    new_debuglevel = 4 if httplib2.debuglevel == 4 else 0
387    with _Httplib2Debuglevel(http_request, new_debuglevel, http=http):
388        info, content = http.request(
389            str(http_request.url), method=str(http_request.http_method),
390            body=http_request.body, headers=http_request.headers,
391            redirections=redirections, connection_type=connection_type)
392
393    if info is None:
394        raise exceptions.RequestError()
395
396    response = Response(info, content, http_request.url)
397    check_response_func(response)
398    return response
399
400
401_HTTP_FACTORIES = []
402
403
404def _RegisterHttpFactory(factory):
405    _HTTP_FACTORIES.append(factory)
406
407
408def GetHttp(**kwds):
409    for factory in _HTTP_FACTORIES:
410        http = factory(**kwds)
411        if http is not None:
412            return http
413    return httplib2.Http(**kwds)
414