1// Copyright 2006-2008 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28// This file contains support for URI manipulations written in 29// JavaScript. 30 31// Expect $String = global.String; 32 33// Lazily initialized. 34var hexCharArray = 0; 35var hexCharCodeArray = 0; 36 37 38function URIAddEncodedOctetToBuffer(octet, result, index) { 39 result[index++] = 37; // Char code of '%'. 40 result[index++] = hexCharCodeArray[octet >> 4]; 41 result[index++] = hexCharCodeArray[octet & 0x0F]; 42 return index; 43} 44 45 46function URIEncodeOctets(octets, result, index) { 47 if (hexCharCodeArray === 0) { 48 hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 49 65, 66, 67, 68, 69, 70]; 50 } 51 index = URIAddEncodedOctetToBuffer(octets[0], result, index); 52 if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index); 53 if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index); 54 if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index); 55 return index; 56} 57 58 59function URIEncodeSingle(cc, result, index) { 60 var x = (cc >> 12) & 0xF; 61 var y = (cc >> 6) & 63; 62 var z = cc & 63; 63 var octets = new $Array(3); 64 if (cc <= 0x007F) { 65 octets[0] = cc; 66 } else if (cc <= 0x07FF) { 67 octets[0] = y + 192; 68 octets[1] = z + 128; 69 } else { 70 octets[0] = x + 224; 71 octets[1] = y + 128; 72 octets[2] = z + 128; 73 } 74 return URIEncodeOctets(octets, result, index); 75} 76 77 78function URIEncodePair(cc1 , cc2, result, index) { 79 var u = ((cc1 >> 6) & 0xF) + 1; 80 var w = (cc1 >> 2) & 0xF; 81 var x = cc1 & 3; 82 var y = (cc2 >> 6) & 0xF; 83 var z = cc2 & 63; 84 var octets = new $Array(4); 85 octets[0] = (u >> 2) + 240; 86 octets[1] = (((u & 3) << 4) | w) + 128; 87 octets[2] = ((x << 4) | y) + 128; 88 octets[3] = z + 128; 89 return URIEncodeOctets(octets, result, index); 90} 91 92 93function URIHexCharsToCharCode(ch1, ch2) { 94 if (HexValueOf(ch1) == -1 || HexValueOf(ch2) == -1) { 95 throw new $URIError("URI malformed"); 96 } 97 return HexStrToCharCode(ch1 + ch2); 98} 99 100 101function URIDecodeOctets(octets, result, index) { 102 var value; 103 var o0 = octets[0]; 104 if (o0 < 0x80) { 105 value = o0; 106 } else if (o0 < 0xc2) { 107 throw new $URIError("URI malformed"); 108 } else { 109 var o1 = octets[1]; 110 if (o0 < 0xe0) { 111 var a = o0 & 0x1f; 112 if ((o1 < 0x80) || (o1 > 0xbf)) 113 throw new $URIError("URI malformed"); 114 var b = o1 & 0x3f; 115 value = (a << 6) + b; 116 if (value < 0x80 || value > 0x7ff) 117 throw new $URIError("URI malformed"); 118 } else { 119 var o2 = octets[2]; 120 if (o0 < 0xf0) { 121 var a = o0 & 0x0f; 122 if ((o1 < 0x80) || (o1 > 0xbf)) 123 throw new $URIError("URI malformed"); 124 var b = o1 & 0x3f; 125 if ((o2 < 0x80) || (o2 > 0xbf)) 126 throw new $URIError("URI malformed"); 127 var c = o2 & 0x3f; 128 value = (a << 12) + (b << 6) + c; 129 if ((value < 0x800) || (value > 0xffff)) 130 throw new $URIError("URI malformed"); 131 } else { 132 var o3 = octets[3]; 133 if (o0 < 0xf8) { 134 var a = (o0 & 0x07); 135 if ((o1 < 0x80) || (o1 > 0xbf)) 136 throw new $URIError("URI malformed"); 137 var b = (o1 & 0x3f); 138 if ((o2 < 0x80) || (o2 > 0xbf)) 139 throw new $URIError("URI malformed"); 140 var c = (o2 & 0x3f); 141 if ((o3 < 0x80) || (o3 > 0xbf)) 142 throw new $URIError("URI malformed"); 143 var d = (o3 & 0x3f); 144 value = (a << 18) + (b << 12) + (c << 6) + d; 145 if ((value < 0x10000) || (value > 0x10ffff)) 146 throw new $URIError("URI malformed"); 147 } else { 148 throw new $URIError("URI malformed"); 149 } 150 } 151 } 152 } 153 if (value < 0x10000) { 154 result[index++] = value; 155 return index; 156 } else { 157 result[index++] = (value >> 10) + 0xd7c0; 158 result[index++] = (value & 0x3ff) + 0xdc00; 159 return index; 160 } 161} 162 163 164// ECMA-262, section 15.1.3 165function Encode(uri, unescape) { 166 var uriLength = uri.length; 167 var result = new $Array(uriLength); 168 var index = 0; 169 for (var k = 0; k < uriLength; k++) { 170 var cc1 = uri.charCodeAt(k); 171 if (unescape(cc1)) { 172 result[index++] = cc1; 173 } else { 174 if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed"); 175 if (cc1 < 0xD800 || cc1 > 0xDBFF) { 176 index = URIEncodeSingle(cc1, result, index); 177 } else { 178 k++; 179 if (k == uriLength) throw new $URIError("URI malformed"); 180 var cc2 = uri.charCodeAt(k); 181 if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed"); 182 index = URIEncodePair(cc1, cc2, result, index); 183 } 184 } 185 } 186 return %StringFromCharCodeArray(result); 187} 188 189 190// ECMA-262, section 15.1.3 191function Decode(uri, reserved) { 192 var uriLength = uri.length; 193 var result = new $Array(uriLength); 194 var index = 0; 195 for (var k = 0; k < uriLength; k++) { 196 var ch = uri.charAt(k); 197 if (ch == '%') { 198 if (k + 2 >= uriLength) throw new $URIError("URI malformed"); 199 var cc = URIHexCharsToCharCode(uri.charAt(++k), uri.charAt(++k)); 200 if (cc >> 7) { 201 var n = 0; 202 while (((cc << ++n) & 0x80) != 0) ; 203 if (n == 1 || n > 4) throw new $URIError("URI malformed"); 204 var octets = new $Array(n); 205 octets[0] = cc; 206 if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed"); 207 for (var i = 1; i < n; i++) { 208 k++; 209 octets[i] = URIHexCharsToCharCode(uri.charAt(++k), uri.charAt(++k)); 210 } 211 index = URIDecodeOctets(octets, result, index); 212 } else { 213 if (reserved(cc)) { 214 result[index++] = 37; // Char code of '%'. 215 result[index++] = uri.charCodeAt(k - 1); 216 result[index++] = uri.charCodeAt(k); 217 } else { 218 result[index++] = cc; 219 } 220 } 221 } else { 222 result[index++] = ch.charCodeAt(0); 223 } 224 } 225 result.length = index; 226 return %StringFromCharCodeArray(result); 227} 228 229 230// ECMA-262 - 15.1.3.1. 231function URIDecode(uri) { 232 function reservedPredicate(cc) { 233 // #$ 234 if (35 <= cc && cc <= 36) return true; 235 // & 236 if (cc == 38) return true; 237 // +, 238 if (43 <= cc && cc <= 44) return true; 239 // / 240 if (cc == 47) return true; 241 // :; 242 if (58 <= cc && cc <= 59) return true; 243 // = 244 if (cc == 61) return true; 245 // ?@ 246 if (63 <= cc && cc <= 64) return true; 247 248 return false; 249 }; 250 var string = ToString(uri); 251 return Decode(string, reservedPredicate); 252} 253 254 255// ECMA-262 - 15.1.3.2. 256function URIDecodeComponent(component) { 257 function reservedPredicate(cc) { return false; }; 258 var string = ToString(component); 259 return Decode(string, reservedPredicate); 260} 261 262 263// Does the char code correspond to an alpha-numeric char. 264function isAlphaNumeric(cc) { 265 // a - z 266 if (97 <= cc && cc <= 122) return true; 267 // A - Z 268 if (65 <= cc && cc <= 90) return true; 269 // 0 - 9 270 if (48 <= cc && cc <= 57) return true; 271 272 return false; 273} 274 275 276// ECMA-262 - 15.1.3.3. 277function URIEncode(uri) { 278 function unescapePredicate(cc) { 279 if (isAlphaNumeric(cc)) return true; 280 // ! 281 if (cc == 33) return true; 282 // #$ 283 if (35 <= cc && cc <= 36) return true; 284 // &'()*+,-./ 285 if (38 <= cc && cc <= 47) return true; 286 // :; 287 if (58 <= cc && cc <= 59) return true; 288 // = 289 if (cc == 61) return true; 290 // ?@ 291 if (63 <= cc && cc <= 64) return true; 292 // _ 293 if (cc == 95) return true; 294 // ~ 295 if (cc == 126) return true; 296 297 return false; 298 }; 299 300 var string = ToString(uri); 301 return Encode(string, unescapePredicate); 302} 303 304 305// ECMA-262 - 15.1.3.4 306function URIEncodeComponent(component) { 307 function unescapePredicate(cc) { 308 if (isAlphaNumeric(cc)) return true; 309 // ! 310 if (cc == 33) return true; 311 // '()* 312 if (39 <= cc && cc <= 42) return true; 313 // -. 314 if (45 <= cc && cc <= 46) return true; 315 // _ 316 if (cc == 95) return true; 317 // ~ 318 if (cc == 126) return true; 319 320 return false; 321 }; 322 323 var string = ToString(component); 324 return Encode(string, unescapePredicate); 325} 326 327 328function HexValueOf(c) { 329 var code = c.charCodeAt(0); 330 331 // 0-9 332 if (code >= 48 && code <= 57) return code - 48; 333 // A-F 334 if (code >= 65 && code <= 70) return code - 55; 335 // a-f 336 if (code >= 97 && code <= 102) return code - 87; 337 338 return -1; 339} 340 341 342// Convert a character code to 4-digit hex string representation 343// 64 -> 0040, 62234 -> F31A. 344function CharCodeToHex4Str(cc) { 345 var r = ""; 346 if (hexCharArray === 0) { 347 hexCharArray = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 348 "A", "B", "C", "D", "E", "F"]; 349 } 350 for (var i = 0; i < 4; ++i) { 351 var c = hexCharArray[cc & 0x0F]; 352 r = c + r; 353 cc = cc >>> 4; 354 } 355 return r; 356} 357 358 359// Converts hex string to char code. Not efficient. 360function HexStrToCharCode(s) { 361 var m = 0; 362 var r = 0; 363 for (var i = s.length - 1; i >= 0; --i) { 364 r = r + (HexValueOf(s.charAt(i)) << m); 365 m = m + 4; 366 } 367 return r; 368} 369 370 371// Returns true if all digits in string s are valid hex numbers 372function IsValidHex(s) { 373 for (var i = 0; i < s.length; ++i) { 374 var cc = s.charCodeAt(i); 375 if ((48 <= cc && cc <= 57) || (65 <= cc && cc <= 70) || (97 <= cc && cc <= 102)) { 376 // '0'..'9', 'A'..'F' and 'a' .. 'f'. 377 } else { 378 return false; 379 } 380 } 381 return true; 382} 383 384 385// ECMA-262 - B.2.1. 386function URIEscape(str) { 387 var s = ToString(str); 388 return %URIEscape(s); 389} 390 391 392// ECMA-262 - B.2.2. 393function URIUnescape(str) { 394 var s = ToString(str); 395 return %URIUnescape(s); 396} 397 398 399// ------------------------------------------------------------------- 400 401function SetupURI() { 402 // Setup non-enumerable URI functions on the global object and set 403 // their names. 404 InstallFunctions(global, DONT_ENUM, $Array( 405 "escape", URIEscape, 406 "unescape", URIUnescape, 407 "decodeURI", URIDecode, 408 "decodeURIComponent", URIDecodeComponent, 409 "encodeURI", URIEncode, 410 "encodeURIComponent", URIEncodeComponent 411 )); 412} 413 414SetupURI(); 415 416