• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""
6A http client with support for https connections with certificate verification.
7
8The verification is based on http://tools.ietf.org/html/rfc6125#section-6.4.3
9and the code is from Lib/ssl.py in python3:
10  http://hg.python.org/cpython/file/4dac45f88d45/Lib/ssl.py
11
12One use case is to download Chromium DEPS file in a secure way:
13  https://src.chromium.org/chrome/trunk/src/DEPS
14
15Notice: python 2.7 or newer is required.
16"""
17
18import cookielib
19import httplib
20import os
21import re
22import socket
23import ssl
24import time
25import urllib
26import urllib2
27
28import http_client
29
30
31_SCRIPT_DIR = os.path.dirname(__file__)
32_TRUSTED_ROOT_CERTS = os.path.join(_SCRIPT_DIR, 'cacert.pem')
33
34
35class CertificateError(ValueError):
36  pass
37
38
39def _DNSNameMatch(dn, hostname, max_wildcards=1):
40  """Matching according to RFC 6125, section 6.4.3
41
42  http://tools.ietf.org/html/rfc6125#section-6.4.3
43  """
44  pats = []
45  if not dn:
46    return False
47
48  parts = dn.split(r'.')
49  leftmost = parts[0]
50  remainder = parts[1:]
51
52  wildcards = leftmost.count('*')
53  if wildcards > max_wildcards:
54    # Issue #17980: avoid denials of service by refusing more
55    # than one wildcard per fragment.  A survery of established
56    # policy among SSL implementations showed it to be a
57    # reasonable choice.
58    raise CertificateError(
59        'too many wildcards in certificate DNS name: ' + repr(dn))
60
61  # speed up common case w/o wildcards
62  if not wildcards:
63    return dn.lower() == hostname.lower()
64
65  # RFC 6125, section 6.4.3, subitem 1.
66  # The client SHOULD NOT attempt to match a presented identifier in which
67  # the wildcard character comprises a label other than the left-most label.
68  if leftmost == '*':
69    # When '*' is a fragment by itself, it matches a non-empty dotless
70    # fragment.
71    pats.append('[^.]+')
72  elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
73    # RFC 6125, section 6.4.3, subitem 3.
74    # The client SHOULD NOT attempt to match a presented identifier
75    # where the wildcard character is embedded within an A-label or
76    # U-label of an internationalized domain name.
77    pats.append(re.escape(leftmost))
78  else:
79    # Otherwise, '*' matches any dotless string, e.g. www*
80    pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
81
82  # add the remaining fragments, ignore any wildcards
83  for frag in remainder:
84    pats.append(re.escape(frag))
85
86  pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
87  return pat.match(hostname)
88
89
90def _MatchHostname(cert, hostname):
91  """Verify that *cert* (in decoded format as returned by
92  SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125
93  rules are followed, but IP addresses are not accepted for *hostname*.
94
95  CertificateError is raised on failure. On success, the function
96  returns nothing.
97  """
98  if not cert:
99    raise ValueError('empty or no certificate, match_hostname needs a '
100                     'SSL socket or SSL context with either '
101                     'CERT_OPTIONAL or CERT_REQUIRED')
102  dnsnames = []
103  san = cert.get('subjectAltName', ())
104  for key, value in san:
105    if key == 'DNS':
106      if _DNSNameMatch(value, hostname):
107        return
108      dnsnames.append(value)
109  if not dnsnames:
110    # The subject is only checked when there is no dNSName entry
111    # in subjectAltName
112    for sub in cert.get('subject', ()):
113      for key, value in sub:
114        # XXX according to RFC 2818, the most specific Common Name
115        # must be used.
116        if key == 'commonName':
117          if _DNSNameMatch(value, hostname):
118            return
119          dnsnames.append(value)
120  if len(dnsnames) > 1:
121    raise CertificateError('hostname %r doesn\'t match either of %s'
122                           % (hostname, ', '.join(map(repr, dnsnames))))
123  elif len(dnsnames) == 1:
124    raise CertificateError('hostname %r doesn\'t match %r'
125                           % (hostname, dnsnames[0]))
126  else:
127    raise CertificateError('no appropriate commonName or '
128                           'subjectAltName fields were found')
129
130
131class HTTPSConnection(httplib.HTTPSConnection):
132
133  def __init__(self, host, root_certs=_TRUSTED_ROOT_CERTS, **kwargs):
134    self.root_certs = root_certs
135    httplib.HTTPSConnection.__init__(self, host, **kwargs)
136
137  def connect(self):
138    # Overrides for certificate verification.
139    args = [(self.host, self.port), self.timeout,]
140    if self.source_address:
141      args.append(self.source_address)
142    sock = socket.create_connection(*args)
143
144    if self._tunnel_host:
145      self.sock = sock
146      self._tunnel()
147
148    # Wrap the socket for verification with the root certs.
149    kwargs = {}
150    if self.root_certs is not None:
151      kwargs.update(cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.root_certs)
152    self.sock = ssl.wrap_socket(sock, **kwargs)
153
154    # Check hostname.
155    try:
156      _MatchHostname(self.sock.getpeercert(), self.host)
157    except CertificateError:
158      self.sock.shutdown(socket.SHUT_RDWR)
159      self.sock.close()
160      raise
161
162
163class HTTPSHandler(urllib2.HTTPSHandler):
164
165  def __init__(self, root_certs=_TRUSTED_ROOT_CERTS):
166    urllib2.HTTPSHandler.__init__(self)
167    self.root_certs = root_certs
168
169  def https_open(self, req):
170    # Pass a reference to the function below so that verification against
171    # trusted root certs could be injected.
172    return self.do_open(self.GetConnection, req)
173
174  def GetConnection(self, host, **kwargs):
175    params = dict(root_certs=self.root_certs)
176    params.update(kwargs)
177    return HTTPSConnection(host, **params)
178
179
180def _SendRequest(url, timeout=None):
181  """Send request to the given https url, and return the server response.
182
183  Args:
184    url: The https url to send request to.
185
186  Returns:
187    An integer: http code of the response.
188    A string: content of the response.
189
190  Raises:
191    CertificateError: Certificate verification fails.
192  """
193  if not url:
194    return None, None
195
196  handlers = []
197  if url.startswith('https://'):
198    # HTTPSHandler has to go first, because we don't want to send secure cookies
199    # to a man in the middle.
200    handlers.append(HTTPSHandler())
201
202
203  cookie_file = os.environ.get('COOKIE_FILE')
204  if cookie_file and os.path.exists(cookie_file):
205    handlers.append(
206        urllib2.HTTPCookieProcessor(cookielib.MozillaCookieJar(cookie_file)))
207
208  url_opener = urllib2.build_opener(*handlers)
209
210  status_code = None
211  content = None
212
213  try:
214    response = url_opener.open(url, timeout=timeout)
215
216    status_code = response.code
217    content = response.read()
218  except urllib2.HTTPError as e:
219    status_code = e.code
220    content = None
221  except (ssl.SSLError, httplib.BadStatusLine, IOError):
222    status_code = -1
223    content = None
224
225  return status_code, content
226
227
228class HttpClientLocal(http_client.HttpClient):
229  """This http client is used locally in a workstation, GCE VMs, etc."""
230
231  @staticmethod
232  def Get(url, params={}, timeout=120, retries=5, retry_interval=0.5,
233          retry_if_not=None):
234    if params:
235      url = '%s?%s' % (url, urllib.urlencode(params))
236
237    count = 0
238    while True:
239      count += 1
240
241      status_code, content = _SendRequest(url, timeout=timeout)
242      if status_code == 200:
243        return status_code, content
244      if retry_if_not and status_code == retry_if_not:
245        return status_code, content
246
247      if count < retries:
248        time.sleep(retry_interval)
249      else:
250        return status_code, content
251
252    # Should never be reached.
253    return status_code, content
254