1// Copyright 2006-2008 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5"use strict"; 6 7// This file relies on the fact that the following declaration has been made 8// in runtime.js: 9// var $Array = global.Array; 10 11// ------------------------------------------------------------------- 12 13// This file contains support for URI manipulations written in 14// JavaScript. 15 16 17(function() { 18 19 // ------------------------------------------------------------------- 20 // Define internal helper functions. 21 22 function HexValueOf(code) { 23 // 0-9 24 if (code >= 48 && code <= 57) return code - 48; 25 // A-F 26 if (code >= 65 && code <= 70) return code - 55; 27 // a-f 28 if (code >= 97 && code <= 102) return code - 87; 29 30 return -1; 31 } 32 33 // Does the char code correspond to an alpha-numeric char. 34 function isAlphaNumeric(cc) { 35 // a - z 36 if (97 <= cc && cc <= 122) return true; 37 // A - Z 38 if (65 <= cc && cc <= 90) return true; 39 // 0 - 9 40 if (48 <= cc && cc <= 57) return true; 41 42 return false; 43 } 44 45 //Lazily initialized. 46 var hexCharCodeArray = 0; 47 48 function URIAddEncodedOctetToBuffer(octet, result, index) { 49 result[index++] = 37; // Char code of '%'. 50 result[index++] = hexCharCodeArray[octet >> 4]; 51 result[index++] = hexCharCodeArray[octet & 0x0F]; 52 return index; 53 } 54 55 function URIEncodeOctets(octets, result, index) { 56 if (hexCharCodeArray === 0) { 57 hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 65, 66, 67, 68, 69, 70]; 59 } 60 index = URIAddEncodedOctetToBuffer(octets[0], result, index); 61 if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index); 62 if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index); 63 if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index); 64 return index; 65 } 66 67 function URIEncodeSingle(cc, result, index) { 68 var x = (cc >> 12) & 0xF; 69 var y = (cc >> 6) & 63; 70 var z = cc & 63; 71 var octets = new $Array(3); 72 if (cc <= 0x007F) { 73 octets[0] = cc; 74 } else if (cc <= 0x07FF) { 75 octets[0] = y + 192; 76 octets[1] = z + 128; 77 } else { 78 octets[0] = x + 224; 79 octets[1] = y + 128; 80 octets[2] = z + 128; 81 } 82 return URIEncodeOctets(octets, result, index); 83 } 84 85 function URIEncodePair(cc1 , cc2, result, index) { 86 var u = ((cc1 >> 6) & 0xF) + 1; 87 var w = (cc1 >> 2) & 0xF; 88 var x = cc1 & 3; 89 var y = (cc2 >> 6) & 0xF; 90 var z = cc2 & 63; 91 var octets = new $Array(4); 92 octets[0] = (u >> 2) + 240; 93 octets[1] = (((u & 3) << 4) | w) + 128; 94 octets[2] = ((x << 4) | y) + 128; 95 octets[3] = z + 128; 96 return URIEncodeOctets(octets, result, index); 97 } 98 99 function URIHexCharsToCharCode(highChar, lowChar) { 100 var highCode = HexValueOf(highChar); 101 var lowCode = HexValueOf(lowChar); 102 if (highCode == -1 || lowCode == -1) { 103 throw new $URIError("URI malformed"); 104 } 105 return (highCode << 4) | lowCode; 106 } 107 108 // Callers must ensure that |result| is a sufficiently long sequential 109 // two-byte string! 110 function URIDecodeOctets(octets, result, index) { 111 var value; 112 var o0 = octets[0]; 113 if (o0 < 0x80) { 114 value = o0; 115 } else if (o0 < 0xc2) { 116 throw new $URIError("URI malformed"); 117 } else { 118 var o1 = octets[1]; 119 if (o0 < 0xe0) { 120 var a = o0 & 0x1f; 121 if ((o1 < 0x80) || (o1 > 0xbf)) { 122 throw new $URIError("URI malformed"); 123 } 124 var b = o1 & 0x3f; 125 value = (a << 6) + b; 126 if (value < 0x80 || value > 0x7ff) { 127 throw new $URIError("URI malformed"); 128 } 129 } else { 130 var o2 = octets[2]; 131 if (o0 < 0xf0) { 132 var a = o0 & 0x0f; 133 if ((o1 < 0x80) || (o1 > 0xbf)) { 134 throw new $URIError("URI malformed"); 135 } 136 var b = o1 & 0x3f; 137 if ((o2 < 0x80) || (o2 > 0xbf)) { 138 throw new $URIError("URI malformed"); 139 } 140 var c = o2 & 0x3f; 141 value = (a << 12) + (b << 6) + c; 142 if ((value < 0x800) || (value > 0xffff)) { 143 throw new $URIError("URI malformed"); 144 } 145 } else { 146 var o3 = octets[3]; 147 if (o0 < 0xf8) { 148 var a = (o0 & 0x07); 149 if ((o1 < 0x80) || (o1 > 0xbf)) { 150 throw new $URIError("URI malformed"); 151 } 152 var b = (o1 & 0x3f); 153 if ((o2 < 0x80) || (o2 > 0xbf)) { 154 throw new $URIError("URI malformed"); 155 } 156 var c = (o2 & 0x3f); 157 if ((o3 < 0x80) || (o3 > 0xbf)) { 158 throw new $URIError("URI malformed"); 159 } 160 var d = (o3 & 0x3f); 161 value = (a << 18) + (b << 12) + (c << 6) + d; 162 if ((value < 0x10000) || (value > 0x10ffff)) { 163 throw new $URIError("URI malformed"); 164 } 165 } else { 166 throw new $URIError("URI malformed"); 167 } 168 } 169 } 170 } 171 if (0xD800 <= value && value <= 0xDFFF) { 172 throw new $URIError("URI malformed"); 173 } 174 if (value < 0x10000) { 175 %_TwoByteSeqStringSetChar(index++, value, result); 176 } else { 177 %_TwoByteSeqStringSetChar(index++, (value >> 10) + 0xd7c0, result); 178 %_TwoByteSeqStringSetChar(index++, (value & 0x3ff) + 0xdc00, result); 179 } 180 return index; 181 } 182 183 // ECMA-262, section 15.1.3 184 function Encode(uri, unescape) { 185 var uriLength = uri.length; 186 var array = new InternalArray(uriLength); 187 var index = 0; 188 for (var k = 0; k < uriLength; k++) { 189 var cc1 = uri.charCodeAt(k); 190 if (unescape(cc1)) { 191 array[index++] = cc1; 192 } else { 193 if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed"); 194 if (cc1 < 0xD800 || cc1 > 0xDBFF) { 195 index = URIEncodeSingle(cc1, array, index); 196 } else { 197 k++; 198 if (k == uriLength) throw new $URIError("URI malformed"); 199 var cc2 = uri.charCodeAt(k); 200 if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed"); 201 index = URIEncodePair(cc1, cc2, array, index); 202 } 203 } 204 } 205 206 var result = %NewString(array.length, NEW_ONE_BYTE_STRING); 207 for (var i = 0; i < array.length; i++) { 208 %_OneByteSeqStringSetChar(i, array[i], result); 209 } 210 return result; 211 } 212 213 // ECMA-262, section 15.1.3 214 function Decode(uri, reserved) { 215 var uriLength = uri.length; 216 var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING); 217 var index = 0; 218 var k = 0; 219 220 // Optimistically assume one-byte string. 221 for ( ; k < uriLength; k++) { 222 var code = uri.charCodeAt(k); 223 if (code == 37) { // '%' 224 if (k + 2 >= uriLength) throw new $URIError("URI malformed"); 225 var cc = URIHexCharsToCharCode(uri.charCodeAt(k+1), uri.charCodeAt(k+2)); 226 if (cc >> 7) break; // Assumption wrong, two-byte string. 227 if (reserved(cc)) { 228 %_OneByteSeqStringSetChar(index++, 37, one_byte); // '%'. 229 %_OneByteSeqStringSetChar(index++, uri.charCodeAt(k+1), one_byte); 230 %_OneByteSeqStringSetChar(index++, uri.charCodeAt(k+2), one_byte); 231 } else { 232 %_OneByteSeqStringSetChar(index++, cc, one_byte); 233 } 234 k += 2; 235 } else { 236 if (code > 0x7f) break; // Assumption wrong, two-byte string. 237 %_OneByteSeqStringSetChar(index++, code, one_byte); 238 } 239 } 240 241 one_byte = %TruncateString(one_byte, index); 242 if (k == uriLength) return one_byte; 243 244 // Write into two byte string. 245 var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING); 246 index = 0; 247 248 for ( ; k < uriLength; k++) { 249 var code = uri.charCodeAt(k); 250 if (code == 37) { // '%' 251 if (k + 2 >= uriLength) throw new $URIError("URI malformed"); 252 var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k)); 253 if (cc >> 7) { 254 var n = 0; 255 while (((cc << ++n) & 0x80) != 0) { } 256 if (n == 1 || n > 4) throw new $URIError("URI malformed"); 257 var octets = new $Array(n); 258 octets[0] = cc; 259 if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed"); 260 for (var i = 1; i < n; i++) { 261 if (uri.charAt(++k) != '%') throw new $URIError("URI malformed"); 262 octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k), 263 uri.charCodeAt(++k)); 264 } 265 index = URIDecodeOctets(octets, two_byte, index); 266 } else if (reserved(cc)) { 267 %_TwoByteSeqStringSetChar(index++, 37, two_byte); // '%'. 268 %_TwoByteSeqStringSetChar(index++, uri.charCodeAt(k - 1), two_byte); 269 %_TwoByteSeqStringSetChar(index++, uri.charCodeAt(k), two_byte); 270 } else { 271 %_TwoByteSeqStringSetChar(index++, cc, two_byte); 272 } 273 } else { 274 %_TwoByteSeqStringSetChar(index++, code, two_byte); 275 } 276 } 277 278 two_byte = %TruncateString(two_byte, index); 279 return one_byte + two_byte; 280 } 281 282 // ------------------------------------------------------------------- 283 // Define exported functions. 284 285 // ECMA-262 - B.2.1. 286 function URIEscapeJS(str) { 287 var s = ToString(str); 288 return %URIEscape(s); 289 } 290 291 // ECMA-262 - B.2.2. 292 function URIUnescapeJS(str) { 293 var s = ToString(str); 294 return %URIUnescape(s); 295 } 296 297 // ECMA-262 - 15.1.3.1. 298 function URIDecode(uri) { 299 var reservedPredicate = function(cc) { 300 // #$ 301 if (35 <= cc && cc <= 36) return true; 302 // & 303 if (cc == 38) return true; 304 // +, 305 if (43 <= cc && cc <= 44) return true; 306 // / 307 if (cc == 47) return true; 308 // :; 309 if (58 <= cc && cc <= 59) return true; 310 // = 311 if (cc == 61) return true; 312 // ?@ 313 if (63 <= cc && cc <= 64) return true; 314 315 return false; 316 }; 317 var string = ToString(uri); 318 return Decode(string, reservedPredicate); 319 } 320 321 // ECMA-262 - 15.1.3.2. 322 function URIDecodeComponent(component) { 323 var reservedPredicate = function(cc) { return false; }; 324 var string = ToString(component); 325 return Decode(string, reservedPredicate); 326 } 327 328 // ECMA-262 - 15.1.3.3. 329 function URIEncode(uri) { 330 var unescapePredicate = function(cc) { 331 if (isAlphaNumeric(cc)) return true; 332 // ! 333 if (cc == 33) return true; 334 // #$ 335 if (35 <= cc && cc <= 36) return true; 336 // &'()*+,-./ 337 if (38 <= cc && cc <= 47) return true; 338 // :; 339 if (58 <= cc && cc <= 59) return true; 340 // = 341 if (cc == 61) return true; 342 // ?@ 343 if (63 <= cc && cc <= 64) return true; 344 // _ 345 if (cc == 95) return true; 346 // ~ 347 if (cc == 126) return true; 348 349 return false; 350 }; 351 var string = ToString(uri); 352 return Encode(string, unescapePredicate); 353 } 354 355 // ECMA-262 - 15.1.3.4 356 function URIEncodeComponent(component) { 357 var unescapePredicate = function(cc) { 358 if (isAlphaNumeric(cc)) return true; 359 // ! 360 if (cc == 33) return true; 361 // '()* 362 if (39 <= cc && cc <= 42) return true; 363 // -. 364 if (45 <= cc && cc <= 46) return true; 365 // _ 366 if (cc == 95) return true; 367 // ~ 368 if (cc == 126) return true; 369 370 return false; 371 }; 372 var string = ToString(component); 373 return Encode(string, unescapePredicate); 374 } 375 376 // ------------------------------------------------------------------- 377 // Install exported functions. 378 379 %CheckIsBootstrapping(); 380 381 // Set up non-enumerable URI functions on the global object and set 382 // their names. 383 InstallFunctions(global, DONT_ENUM, $Array( 384 "escape", URIEscapeJS, 385 "unescape", URIUnescapeJS, 386 "decodeURI", URIDecode, 387 "decodeURIComponent", URIDecodeComponent, 388 "encodeURI", URIEncode, 389 "encodeURIComponent", URIEncodeComponent 390 )); 391 392})(); 393