1# Copyright (c) 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5""" 6A http client with support for https connections with certificate verification. 7 8The verification is based on http://tools.ietf.org/html/rfc6125#section-6.4.3 9and the code is from Lib/ssl.py in python3: 10 http://hg.python.org/cpython/file/4dac45f88d45/Lib/ssl.py 11 12One use case is to download Chromium DEPS file in a secure way: 13 https://src.chromium.org/chrome/trunk/src/DEPS 14 15Notice: python 2.7 or newer is required. 16""" 17 18import cookielib 19import httplib 20import os 21import re 22import socket 23import ssl 24import time 25import urllib 26import urllib2 27 28import http_client 29 30 31_SCRIPT_DIR = os.path.dirname(__file__) 32_TRUSTED_ROOT_CERTS = os.path.join(_SCRIPT_DIR, 'cacert.pem') 33 34 35class CertificateError(ValueError): 36 pass 37 38 39def _DNSNameMatch(dn, hostname, max_wildcards=1): 40 """Matching according to RFC 6125, section 6.4.3 41 42 http://tools.ietf.org/html/rfc6125#section-6.4.3 43 """ 44 pats = [] 45 if not dn: 46 return False 47 48 parts = dn.split(r'.') 49 leftmost = parts[0] 50 remainder = parts[1:] 51 52 wildcards = leftmost.count('*') 53 if wildcards > max_wildcards: 54 # Issue #17980: avoid denials of service by refusing more 55 # than one wildcard per fragment. A survery of established 56 # policy among SSL implementations showed it to be a 57 # reasonable choice. 58 raise CertificateError( 59 'too many wildcards in certificate DNS name: ' + repr(dn)) 60 61 # speed up common case w/o wildcards 62 if not wildcards: 63 return dn.lower() == hostname.lower() 64 65 # RFC 6125, section 6.4.3, subitem 1. 66 # The client SHOULD NOT attempt to match a presented identifier in which 67 # the wildcard character comprises a label other than the left-most label. 68 if leftmost == '*': 69 # When '*' is a fragment by itself, it matches a non-empty dotless 70 # fragment. 71 pats.append('[^.]+') 72 elif leftmost.startswith('xn--') or hostname.startswith('xn--'): 73 # RFC 6125, section 6.4.3, subitem 3. 74 # The client SHOULD NOT attempt to match a presented identifier 75 # where the wildcard character is embedded within an A-label or 76 # U-label of an internationalized domain name. 77 pats.append(re.escape(leftmost)) 78 else: 79 # Otherwise, '*' matches any dotless string, e.g. www* 80 pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) 81 82 # add the remaining fragments, ignore any wildcards 83 for frag in remainder: 84 pats.append(re.escape(frag)) 85 86 pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) 87 return pat.match(hostname) 88 89 90def _MatchHostname(cert, hostname): 91 """Verify that *cert* (in decoded format as returned by 92 SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 93 rules are followed, but IP addresses are not accepted for *hostname*. 94 95 CertificateError is raised on failure. On success, the function 96 returns nothing. 97 """ 98 if not cert: 99 raise ValueError('empty or no certificate, match_hostname needs a ' 100 'SSL socket or SSL context with either ' 101 'CERT_OPTIONAL or CERT_REQUIRED') 102 dnsnames = [] 103 san = cert.get('subjectAltName', ()) 104 for key, value in san: 105 if key == 'DNS': 106 if _DNSNameMatch(value, hostname): 107 return 108 dnsnames.append(value) 109 if not dnsnames: 110 # The subject is only checked when there is no dNSName entry 111 # in subjectAltName 112 for sub in cert.get('subject', ()): 113 for key, value in sub: 114 # XXX according to RFC 2818, the most specific Common Name 115 # must be used. 116 if key == 'commonName': 117 if _DNSNameMatch(value, hostname): 118 return 119 dnsnames.append(value) 120 if len(dnsnames) > 1: 121 raise CertificateError('hostname %r doesn\'t match either of %s' 122 % (hostname, ', '.join(map(repr, dnsnames)))) 123 elif len(dnsnames) == 1: 124 raise CertificateError('hostname %r doesn\'t match %r' 125 % (hostname, dnsnames[0])) 126 else: 127 raise CertificateError('no appropriate commonName or ' 128 'subjectAltName fields were found') 129 130 131class HTTPSConnection(httplib.HTTPSConnection): 132 133 def __init__(self, host, root_certs=_TRUSTED_ROOT_CERTS, **kwargs): 134 self.root_certs = root_certs 135 httplib.HTTPSConnection.__init__(self, host, **kwargs) 136 137 def connect(self): 138 # Overrides for certificate verification. 139 args = [(self.host, self.port), self.timeout,] 140 if self.source_address: 141 args.append(self.source_address) 142 sock = socket.create_connection(*args) 143 144 if self._tunnel_host: 145 self.sock = sock 146 self._tunnel() 147 148 # Wrap the socket for verification with the root certs. 149 kwargs = {} 150 if self.root_certs is not None: 151 kwargs.update(cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.root_certs) 152 self.sock = ssl.wrap_socket(sock, **kwargs) 153 154 # Check hostname. 155 try: 156 _MatchHostname(self.sock.getpeercert(), self.host) 157 except CertificateError: 158 self.sock.shutdown(socket.SHUT_RDWR) 159 self.sock.close() 160 raise 161 162 163class HTTPSHandler(urllib2.HTTPSHandler): 164 165 def __init__(self, root_certs=_TRUSTED_ROOT_CERTS): 166 urllib2.HTTPSHandler.__init__(self) 167 self.root_certs = root_certs 168 169 def https_open(self, req): 170 # Pass a reference to the function below so that verification against 171 # trusted root certs could be injected. 172 return self.do_open(self.GetConnection, req) 173 174 def GetConnection(self, host, **kwargs): 175 params = dict(root_certs=self.root_certs) 176 params.update(kwargs) 177 return HTTPSConnection(host, **params) 178 179 180def _SendRequest(url, timeout=None): 181 """Send request to the given https url, and return the server response. 182 183 Args: 184 url: The https url to send request to. 185 186 Returns: 187 An integer: http code of the response. 188 A string: content of the response. 189 190 Raises: 191 CertificateError: Certificate verification fails. 192 """ 193 if not url: 194 return None, None 195 196 handlers = [] 197 if url.startswith('https://'): 198 # HTTPSHandler has to go first, because we don't want to send secure cookies 199 # to a man in the middle. 200 handlers.append(HTTPSHandler()) 201 202 203 cookie_file = os.environ.get('COOKIE_FILE') 204 if cookie_file and os.path.exists(cookie_file): 205 handlers.append( 206 urllib2.HTTPCookieProcessor(cookielib.MozillaCookieJar(cookie_file))) 207 208 url_opener = urllib2.build_opener(*handlers) 209 210 status_code = None 211 content = None 212 213 try: 214 response = url_opener.open(url, timeout=timeout) 215 216 status_code = response.code 217 content = response.read() 218 except urllib2.HTTPError as e: 219 status_code = e.code 220 content = None 221 except (ssl.SSLError, httplib.BadStatusLine, IOError): 222 status_code = -1 223 content = None 224 225 return status_code, content 226 227 228class HttpClientLocal(http_client.HttpClient): 229 """This http client is used locally in a workstation, GCE VMs, etc.""" 230 231 @staticmethod 232 def Get(url, params={}, timeout=120, retries=5, retry_interval=0.5, 233 retry_if_not=None): 234 if params: 235 url = '%s?%s' % (url, urllib.urlencode(params)) 236 237 count = 0 238 while True: 239 count += 1 240 241 status_code, content = _SendRequest(url, timeout=timeout) 242 if status_code == 200: 243 return status_code, content 244 if retry_if_not and status_code == retry_if_not: 245 return status_code, content 246 247 if count < retries: 248 time.sleep(retry_interval) 249 else: 250 return status_code, content 251 252 # Should never be reached. 253 return status_code, content 254