1# Copyright 2011, Google Inc. 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: 7# 8# * Redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer. 10# * Redistributions in binary form must reproduce the above 11# copyright notice, this list of conditions and the following disclaimer 12# in the documentation and/or other materials provided with the 13# distribution. 14# * Neither the name of Google Inc. nor the names of its 15# contributors may be used to endorse or promote products derived from 16# this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 31"""Utilities for parsing and formatting headers that follow the grammar defined 32in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt. 33""" 34 35 36import urlparse 37 38 39_SEPARATORS = '()<>@,;:\\"/[]?={} \t' 40 41 42def _is_char(c): 43 """Returns true iff c is in CHAR as specified in HTTP RFC.""" 44 45 return ord(c) <= 127 46 47 48def _is_ctl(c): 49 """Returns true iff c is in CTL as specified in HTTP RFC.""" 50 51 return ord(c) <= 31 or ord(c) == 127 52 53 54class ParsingState(object): 55 56 def __init__(self, data): 57 self.data = data 58 self.head = 0 59 60 61def peek(state, pos=0): 62 """Peeks the character at pos from the head of data.""" 63 64 if state.head + pos >= len(state.data): 65 return None 66 67 return state.data[state.head + pos] 68 69 70def consume(state, amount=1): 71 """Consumes specified amount of bytes from the head and returns the 72 consumed bytes. If there's not enough bytes to consume, returns None. 73 """ 74 75 if state.head + amount > len(state.data): 76 return None 77 78 result = state.data[state.head:state.head + amount] 79 state.head = state.head + amount 80 return result 81 82 83def consume_string(state, expected): 84 """Given a parsing state and a expected string, consumes the string from 85 the head. Returns True if consumed successfully. Otherwise, returns 86 False. 87 """ 88 89 pos = 0 90 91 for c in expected: 92 if c != peek(state, pos): 93 return False 94 pos += 1 95 96 consume(state, pos) 97 return True 98 99 100def consume_lws(state): 101 """Consumes a LWS from the head. Returns True if any LWS is consumed. 102 Otherwise, returns False. 103 104 LWS = [CRLF] 1*( SP | HT ) 105 """ 106 107 original_head = state.head 108 109 consume_string(state, '\r\n') 110 111 pos = 0 112 113 while True: 114 c = peek(state, pos) 115 if c == ' ' or c == '\t': 116 pos += 1 117 else: 118 if pos == 0: 119 state.head = original_head 120 return False 121 else: 122 consume(state, pos) 123 return True 124 125 126def consume_lwses(state): 127 """Consumes *LWS from the head.""" 128 129 while consume_lws(state): 130 pass 131 132 133def consume_token(state): 134 """Consumes a token from the head. Returns the token or None if no token 135 was found. 136 """ 137 138 pos = 0 139 140 while True: 141 c = peek(state, pos) 142 if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): 143 if pos == 0: 144 return None 145 146 return consume(state, pos) 147 else: 148 pos += 1 149 150 151def consume_token_or_quoted_string(state): 152 """Consumes a token or a quoted-string, and returns the token or unquoted 153 string. If no token or quoted-string was found, returns None. 154 """ 155 156 original_head = state.head 157 158 if not consume_string(state, '"'): 159 return consume_token(state) 160 161 result = [] 162 163 expect_quoted_pair = False 164 165 while True: 166 if not expect_quoted_pair and consume_lws(state): 167 result.append(' ') 168 continue 169 170 c = consume(state) 171 if c is None: 172 # quoted-string is not enclosed with double quotation 173 state.head = original_head 174 return None 175 elif expect_quoted_pair: 176 expect_quoted_pair = False 177 if _is_char(c): 178 result.append(c) 179 else: 180 # Non CHAR character found in quoted-pair 181 state.head = original_head 182 return None 183 elif c == '\\': 184 expect_quoted_pair = True 185 elif c == '"': 186 return ''.join(result) 187 elif _is_ctl(c): 188 # Invalid character %r found in qdtext 189 state.head = original_head 190 return None 191 else: 192 result.append(c) 193 194 195def quote_if_necessary(s): 196 """Quotes arbitrary string into quoted-string.""" 197 198 quote = False 199 if s == '': 200 return '""' 201 202 result = [] 203 for c in s: 204 if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): 205 quote = True 206 207 if c == '"' or _is_ctl(c): 208 result.append('\\' + c) 209 else: 210 result.append(c) 211 212 if quote: 213 return '"' + ''.join(result) + '"' 214 else: 215 return ''.join(result) 216 217 218def parse_uri(uri): 219 """Parse absolute URI then return host, port and resource.""" 220 221 parsed = urlparse.urlsplit(uri) 222 if parsed.scheme != 'wss' and parsed.scheme != 'ws': 223 # |uri| must be a relative URI. 224 # TODO(toyoshim): Should validate |uri|. 225 return None, None, uri 226 227 if parsed.hostname is None: 228 return None, None, None 229 230 port = None 231 try: 232 port = parsed.port 233 except ValueError, e: 234 # port property cause ValueError on invalid null port description like 235 # 'ws://host:/path'. 236 return None, None, None 237 238 if port is None: 239 if parsed.scheme == 'ws': 240 port = 80 241 else: 242 port = 443 243 244 path = parsed.path 245 if not path: 246 path += '/' 247 if parsed.query: 248 path += '?' + parsed.query 249 if parsed.fragment: 250 path += '#' + parsed.fragment 251 252 return parsed.hostname, port, path 253 254 255try: 256 urlparse.uses_netloc.index('ws') 257except ValueError, e: 258 # urlparse in Python2.5.1 doesn't have 'ws' and 'wss' entries. 259 urlparse.uses_netloc.append('ws') 260 urlparse.uses_netloc.append('wss') 261 262 263# vi:sts=4 sw=4 et 264