• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software
4# Foundation; All Rights Reserved
5
6"""A HTTPSConnection/Handler with additional proxy and cert validation features.
7
8In particular, monkey patches in Python r74203 to provide support for CONNECT
9proxies and adds SSL cert validation if the ssl module is present.
10"""
11
12__author__ = "{frew,nick.johnson}@google.com (Fred Wulff and Nick Johnson)"
13
14import base64
15import httplib
16import logging
17import re
18import socket
19import urllib2
20
21from urllib import splittype
22from urllib import splituser
23from urllib import splitpasswd
24
25class InvalidCertificateException(httplib.HTTPException):
26  """Raised when a certificate is provided with an invalid hostname."""
27
28  def __init__(self, host, cert, reason):
29    """Constructor.
30
31    Args:
32      host: The hostname the connection was made to.
33      cert: The SSL certificate (as a dictionary) the host returned.
34    """
35    httplib.HTTPException.__init__(self)
36    self.host = host
37    self.cert = cert
38    self.reason = reason
39
40  def __str__(self):
41    return ('Host %s returned an invalid certificate (%s): %s\n'
42            'To learn more, see '
43            'http://code.google.com/appengine/kb/general.html#rpcssl' %
44            (self.host, self.reason, self.cert))
45
46def can_validate_certs():
47  """Return True if we have the SSL package and can validate certificates."""
48  try:
49    import ssl
50    return True
51  except ImportError:
52    return False
53
54def _create_fancy_connection(tunnel_host=None, key_file=None,
55                             cert_file=None, ca_certs=None):
56  # This abomination brought to you by the fact that
57  # the HTTPHandler creates the connection instance in the middle
58  # of do_open so we need to add the tunnel host to the class.
59
60  class PresetProxyHTTPSConnection(httplib.HTTPSConnection):
61    """An HTTPS connection that uses a proxy defined by the enclosing scope."""
62
63    def __init__(self, *args, **kwargs):
64      httplib.HTTPSConnection.__init__(self, *args, **kwargs)
65
66      self._tunnel_host = tunnel_host
67      if tunnel_host:
68        logging.debug("Creating preset proxy https conn: %s", tunnel_host)
69
70      self.key_file = key_file
71      self.cert_file = cert_file
72      self.ca_certs = ca_certs
73      try:
74        import ssl
75        if self.ca_certs:
76          self.cert_reqs = ssl.CERT_REQUIRED
77        else:
78          self.cert_reqs = ssl.CERT_NONE
79      except ImportError:
80        pass
81
82    def _tunnel(self):
83      self._set_hostport(self._tunnel_host, None)
84      logging.info("Connecting through tunnel to: %s:%d",
85                   self.host, self.port)
86      self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self.host, self.port))
87      response = self.response_class(self.sock, strict=self.strict,
88                                     method=self._method)
89      (_, code, message) = response._read_status()
90
91      if code != 200:
92        self.close()
93        raise socket.error, "Tunnel connection failed: %d %s" % (
94            code, message.strip())
95
96      while True:
97        line = response.fp.readline()
98        if line == "\r\n":
99          break
100
101    def _get_valid_hosts_for_cert(self, cert):
102      """Returns a list of valid host globs for an SSL certificate.
103
104      Args:
105        cert: A dictionary representing an SSL certificate.
106      Returns:
107        list: A list of valid host globs.
108      """
109      if 'subjectAltName' in cert:
110        return [x[1] for x in cert['subjectAltName'] if x[0].lower() == 'dns']
111      else:
112        # Return a list of commonName fields
113        return [x[0][1] for x in cert['subject']
114                if x[0][0].lower() == 'commonname']
115
116    def _validate_certificate_hostname(self, cert, hostname):
117      """Validates that a given hostname is valid for an SSL certificate.
118
119      Args:
120        cert: A dictionary representing an SSL certificate.
121        hostname: The hostname to test.
122      Returns:
123        bool: Whether or not the hostname is valid for this certificate.
124      """
125      hosts = self._get_valid_hosts_for_cert(cert)
126      for host in hosts:
127        # Convert the glob-style hostname expression (eg, '*.google.com') into a
128        # valid regular expression.
129        host_re = host.replace('.', '\.').replace('*', '[^.]*')
130        if re.search('^%s$' % (host_re,), hostname, re.I):
131          return True
132      return False
133
134
135    def connect(self):
136      # TODO(frew): When we drop support for <2.6 (in the far distant future),
137      # change this to socket.create_connection.
138      self.sock = _create_connection((self.host, self.port))
139
140      if self._tunnel_host:
141        self._tunnel()
142
143      # ssl and FakeSocket got deprecated. Try for the new hotness of wrap_ssl,
144      # with fallback.
145      try:
146        import ssl
147        self.sock = ssl.wrap_socket(self.sock,
148                                    keyfile=self.key_file,
149                                    certfile=self.cert_file,
150                                    ca_certs=self.ca_certs,
151                                    cert_reqs=self.cert_reqs)
152
153        if self.cert_reqs & ssl.CERT_REQUIRED:
154          cert = self.sock.getpeercert()
155          hostname = self.host.split(':', 0)[0]
156          if not self._validate_certificate_hostname(cert, hostname):
157            raise InvalidCertificateException(hostname, cert,
158                                              'hostname mismatch')
159      except ImportError:
160        ssl = socket.ssl(self.sock,
161                         keyfile=self.key_file,
162                         certfile=self.cert_file)
163        self.sock = httplib.FakeSocket(self.sock, ssl)
164
165  return PresetProxyHTTPSConnection
166
167
168# Here to end of _create_connection copied wholesale from Python 2.6"s socket.py
169_GLOBAL_DEFAULT_TIMEOUT = object()
170
171
172def _create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT):
173  """Connect to *address* and return the socket object.
174
175  Convenience function.  Connect to *address* (a 2-tuple ``(host,
176  port)``) and return the socket object.  Passing the optional
177  *timeout* parameter will set the timeout on the socket instance
178  before attempting to connect.  If no *timeout* is supplied, the
179  global default timeout setting returned by :func:`getdefaulttimeout`
180  is used.
181  """
182
183  msg = "getaddrinfo returns an empty list"
184  host, port = address
185  for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
186    af, socktype, proto, canonname, sa = res
187    sock = None
188    try:
189      sock = socket.socket(af, socktype, proto)
190      if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
191        sock.settimeout(timeout)
192      sock.connect(sa)
193      return sock
194
195    except socket.error, msg:
196      if sock is not None:
197        sock.close()
198
199  raise socket.error, msg
200
201
202class FancyRequest(urllib2.Request):
203  """A request that allows the use of a CONNECT proxy."""
204
205  def __init__(self, *args, **kwargs):
206    urllib2.Request.__init__(self, *args, **kwargs)
207    self._tunnel_host = None
208    self._key_file = None
209    self._cert_file = None
210    self._ca_certs = None
211
212  def set_proxy(self, host, type):
213    saved_type = None
214
215    if self.get_type() == "https" and not self._tunnel_host:
216      self._tunnel_host = self.get_host()
217      saved_type = self.get_type()
218    urllib2.Request.set_proxy(self, host, type)
219
220    if saved_type:
221      # Don't set self.type, we want to preserve the
222      # type for tunneling.
223      self.type = saved_type
224
225  def set_ssl_info(self, key_file=None, cert_file=None, ca_certs=None):
226    self._key_file = key_file
227    self._cert_file = cert_file
228    self._ca_certs = ca_certs
229
230
231class FancyProxyHandler(urllib2.ProxyHandler):
232  """A ProxyHandler that works with CONNECT-enabled proxies."""
233
234  # Taken verbatim from /usr/lib/python2.5/urllib2.py
235  def _parse_proxy(self, proxy):
236    """Return (scheme, user, password, host/port) given a URL or an authority.
237
238    If a URL is supplied, it must have an authority (host:port) component.
239    According to RFC 3986, having an authority component means the URL must
240    have two slashes after the scheme:
241
242    >>> _parse_proxy('file:/ftp.example.com/')
243    Traceback (most recent call last):
244    ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
245
246    The first three items of the returned tuple may be None.
247
248    Examples of authority parsing:
249
250    >>> _parse_proxy('proxy.example.com')
251    (None, None, None, 'proxy.example.com')
252    >>> _parse_proxy('proxy.example.com:3128')
253    (None, None, None, 'proxy.example.com:3128')
254
255    The authority component may optionally include userinfo (assumed to be
256    username:password):
257
258    >>> _parse_proxy('joe:password@proxy.example.com')
259    (None, 'joe', 'password', 'proxy.example.com')
260    >>> _parse_proxy('joe:password@proxy.example.com:3128')
261    (None, 'joe', 'password', 'proxy.example.com:3128')
262
263    Same examples, but with URLs instead:
264
265    >>> _parse_proxy('http://proxy.example.com/')
266    ('http', None, None, 'proxy.example.com')
267    >>> _parse_proxy('http://proxy.example.com:3128/')
268    ('http', None, None, 'proxy.example.com:3128')
269    >>> _parse_proxy('http://joe:password@proxy.example.com/')
270    ('http', 'joe', 'password', 'proxy.example.com')
271    >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
272    ('http', 'joe', 'password', 'proxy.example.com:3128')
273
274    Everything after the authority is ignored:
275
276    >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
277    ('ftp', 'joe', 'password', 'proxy.example.com')
278
279    Test for no trailing '/' case:
280
281    >>> _parse_proxy('http://joe:password@proxy.example.com')
282    ('http', 'joe', 'password', 'proxy.example.com')
283
284    """
285    scheme, r_scheme = splittype(proxy)
286    if not r_scheme.startswith("/"):
287      # authority
288      scheme = None
289      authority = proxy
290    else:
291      # URL
292      if not r_scheme.startswith("//"):
293        raise ValueError("proxy URL with no authority: %r" % proxy)
294      # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
295      # and 3.3.), path is empty or starts with '/'
296      end = r_scheme.find("/", 2)
297      if end == -1:
298        end = None
299      authority = r_scheme[2:end]
300    userinfo, hostport = splituser(authority)
301    if userinfo is not None:
302      user, password = splitpasswd(userinfo)
303    else:
304      user = password = None
305    return scheme, user, password, hostport
306
307  def proxy_open(self, req, proxy, type):
308    # This block is copied wholesale from Python2.6 urllib2.
309    # It is idempotent, so the superclass method call executes as normal
310    # if invoked.
311    orig_type = req.get_type()
312    proxy_type, user, password, hostport = self._parse_proxy(proxy)
313    if proxy_type is None:
314      proxy_type = orig_type
315    if user and password:
316      user_pass = "%s:%s" % (urllib2.unquote(user), urllib2.unquote(password))
317      creds = base64.b64encode(user_pass).strip()
318      # Later calls overwrite earlier calls for the same header
319      req.add_header("Proxy-authorization", "Basic " + creds)
320    hostport = urllib2.unquote(hostport)
321    req.set_proxy(hostport, proxy_type)
322    # This condition is the change
323    if orig_type == "https":
324      return None
325
326    return urllib2.ProxyHandler.proxy_open(self, req, proxy, type)
327
328
329class FancyHTTPSHandler(urllib2.HTTPSHandler):
330  """An HTTPSHandler that works with CONNECT-enabled proxies."""
331
332  def do_open(self, http_class, req):
333    # Intentionally very specific so as to opt for false negatives
334    # rather than false positives.
335    try:
336      return urllib2.HTTPSHandler.do_open(
337          self,
338          _create_fancy_connection(req._tunnel_host,
339                                   req._key_file,
340                                   req._cert_file,
341                                   req._ca_certs),
342          req)
343    except urllib2.URLError, url_error:
344      try:
345        import ssl
346        if (type(url_error.reason) == ssl.SSLError and
347            url_error.reason.args[0] == 1):
348          # Display the reason to the user. Need to use args for python2.5
349          # compat.
350          raise InvalidCertificateException(req.host, '',
351                                            url_error.reason.args[1])
352      except ImportError:
353        pass
354
355      raise url_error
356
357
358# We have to implement this so that we persist the tunneling behavior
359# through redirects.
360class FancyRedirectHandler(urllib2.HTTPRedirectHandler):
361  """A redirect handler that persists CONNECT-enabled proxy information."""
362
363  def redirect_request(self, req, *args, **kwargs):
364    new_req = urllib2.HTTPRedirectHandler.redirect_request(
365        self, req, *args, **kwargs)
366    # Same thing as in our set_proxy implementation, but in this case
367    # we"ve only got a Request to work with, so it was this or copy
368    # everything over piecemeal.
369    #
370    # Note that we do not persist tunneling behavior from an http request
371    # to an https request, because an http request does not set _tunnel_host.
372    #
373    # Also note that in Python < 2.6, you will get an error in
374    # FancyHTTPSHandler.do_open() on an https urllib2.Request that uses an http
375    # proxy, since the proxy type will be set to http instead of https.
376    # (FancyRequest, and urllib2.Request in Python >= 2.6 set the proxy type to
377    # https.)  Such an urllib2.Request could result from this redirect
378    # if you are redirecting from an http request (since an an http request
379    # does not have _tunnel_host set, and thus you will not set the proxy
380    # in the code below), and if you have defined a proxy for https in, say,
381    # FancyProxyHandler, and that proxy has type http.
382    if hasattr(req, "_tunnel_host") and isinstance(new_req, urllib2.Request):
383      if new_req.get_type() == "https":
384        if req._tunnel_host:
385          # req is proxied, so copy the proxy info.
386          new_req._tunnel_host = new_req.get_host()
387          new_req.set_proxy(req.host, "https")
388        else:
389          # req is not proxied, so just make sure _tunnel_host is defined.
390          new_req._tunnel_host = None
391        new_req.type = "https"
392    if hasattr(req, "_key_file") and isinstance(new_req, urllib2.Request):
393      # Copy the auxiliary data in case this or any further redirect is https
394      new_req._key_file = req._key_file
395      new_req._cert_file = req._cert_file
396      new_req._ca_certs = req._ca_certs
397
398    return new_req
399