• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2006-2008 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This file contains support for URI manipulations written in
6// JavaScript.
7
8(function(global, utils) {
9
10"use strict";
11
12%CheckIsBootstrapping();
13
14//- ------------------------------------------------------------------
15// Imports
16
17var GlobalObject = global.Object;
18var GlobalArray = global.Array;
19var InternalArray = utils.InternalArray;
20var MakeURIError;
21
22utils.Import(function(from) {
23  MakeURIError = from.MakeURIError;
24});
25
26
27// -------------------------------------------------------------------
28// Define internal helper functions.
29
30function HexValueOf(code) {
31  // 0-9
32  if (code >= 48 && code <= 57) return code - 48;
33  // A-F
34  if (code >= 65 && code <= 70) return code - 55;
35  // a-f
36  if (code >= 97 && code <= 102) return code - 87;
37
38  return -1;
39}
40
41// Does the char code correspond to an alpha-numeric char.
42function isAlphaNumeric(cc) {
43  // a - z
44  if (97 <= cc && cc <= 122) return true;
45  // A - Z
46  if (65 <= cc && cc <= 90) return true;
47  // 0 - 9
48  if (48 <= cc && cc <= 57) return true;
49
50  return false;
51}
52
53// Lazily initialized.
54var hexCharCodeArray = 0;
55
56function URIAddEncodedOctetToBuffer(octet, result, index) {
57  result[index++] = 37; // Char code of '%'.
58  result[index++] = hexCharCodeArray[octet >> 4];
59  result[index++] = hexCharCodeArray[octet & 0x0F];
60  return index;
61}
62
63function URIEncodeOctets(octets, result, index) {
64  if (hexCharCodeArray === 0) {
65    hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
66                        65, 66, 67, 68, 69, 70];
67  }
68  index = URIAddEncodedOctetToBuffer(octets[0], result, index);
69  if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
70  if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
71  if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
72  return index;
73}
74
75function URIEncodeSingle(cc, result, index) {
76  var x = (cc >> 12) & 0xF;
77  var y = (cc >> 6) & 63;
78  var z = cc & 63;
79  var octets = new GlobalArray(3);
80  if (cc <= 0x007F) {
81    octets[0] = cc;
82  } else if (cc <= 0x07FF) {
83    octets[0] = y + 192;
84    octets[1] = z + 128;
85  } else {
86    octets[0] = x + 224;
87    octets[1] = y + 128;
88    octets[2] = z + 128;
89  }
90  return URIEncodeOctets(octets, result, index);
91}
92
93function URIEncodePair(cc1 , cc2, result, index) {
94  var u = ((cc1 >> 6) & 0xF) + 1;
95  var w = (cc1 >> 2) & 0xF;
96  var x = cc1 & 3;
97  var y = (cc2 >> 6) & 0xF;
98  var z = cc2 & 63;
99  var octets = new GlobalArray(4);
100  octets[0] = (u >> 2) + 240;
101  octets[1] = (((u & 3) << 4) | w) + 128;
102  octets[2] = ((x << 4) | y) + 128;
103  octets[3] = z + 128;
104  return URIEncodeOctets(octets, result, index);
105}
106
107function URIHexCharsToCharCode(highChar, lowChar) {
108  var highCode = HexValueOf(highChar);
109  var lowCode = HexValueOf(lowChar);
110  if (highCode == -1 || lowCode == -1) throw MakeURIError();
111  return (highCode << 4) | lowCode;
112}
113
114// Callers must ensure that |result| is a sufficiently long sequential
115// two-byte string!
116function URIDecodeOctets(octets, result, index) {
117  var value;
118  var o0 = octets[0];
119  if (o0 < 0x80) {
120    value = o0;
121  } else if (o0 < 0xc2) {
122    throw MakeURIError();
123  } else {
124    var o1 = octets[1];
125    if (o0 < 0xe0) {
126      var a = o0 & 0x1f;
127      if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
128      var b = o1 & 0x3f;
129      value = (a << 6) + b;
130      if (value < 0x80 || value > 0x7ff) throw MakeURIError();
131    } else {
132      var o2 = octets[2];
133      if (o0 < 0xf0) {
134        var a = o0 & 0x0f;
135        if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
136        var b = o1 & 0x3f;
137        if ((o2 < 0x80) || (o2 > 0xbf)) throw MakeURIError();
138        var c = o2 & 0x3f;
139        value = (a << 12) + (b << 6) + c;
140        if ((value < 0x800) || (value > 0xffff)) throw MakeURIError();
141      } else {
142        var o3 = octets[3];
143        if (o0 < 0xf8) {
144          var a = (o0 & 0x07);
145          if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
146          var b = (o1 & 0x3f);
147          if ((o2 < 0x80) || (o2 > 0xbf)) {
148            throw MakeURIError();
149          }
150          var c = (o2 & 0x3f);
151          if ((o3 < 0x80) || (o3 > 0xbf)) throw MakeURIError();
152          var d = (o3 & 0x3f);
153          value = (a << 18) + (b << 12) + (c << 6) + d;
154          if ((value < 0x10000) || (value > 0x10ffff)) throw MakeURIError();
155        } else {
156          throw MakeURIError();
157        }
158      }
159    }
160  }
161  if (0xD800 <= value && value <= 0xDFFF) throw MakeURIError();
162  if (value < 0x10000) {
163    %_TwoByteSeqStringSetChar(index++, value, result);
164  } else {
165    %_TwoByteSeqStringSetChar(index++, (value >> 10) + 0xd7c0, result);
166    %_TwoByteSeqStringSetChar(index++, (value & 0x3ff) + 0xdc00, result);
167  }
168  return index;
169}
170
171// ECMA-262, section 15.1.3
172function Encode(uri, unescape) {
173  uri = TO_STRING(uri);
174  var uriLength = uri.length;
175  var array = new InternalArray(uriLength);
176  var index = 0;
177  for (var k = 0; k < uriLength; k++) {
178    var cc1 = %_StringCharCodeAt(uri, k);
179    if (unescape(cc1)) {
180      array[index++] = cc1;
181    } else {
182      if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw MakeURIError();
183      if (cc1 < 0xD800 || cc1 > 0xDBFF) {
184        index = URIEncodeSingle(cc1, array, index);
185      } else {
186        k++;
187        if (k == uriLength) throw MakeURIError();
188        var cc2 = %_StringCharCodeAt(uri, k);
189        if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw MakeURIError();
190        index = URIEncodePair(cc1, cc2, array, index);
191      }
192    }
193  }
194
195  var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
196  for (var i = 0; i < array.length; i++) {
197    %_OneByteSeqStringSetChar(i, array[i], result);
198  }
199  return result;
200}
201
202// ECMA-262, section 15.1.3
203function Decode(uri, reserved) {
204  uri = TO_STRING(uri);
205  var uriLength = uri.length;
206  var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING);
207  var index = 0;
208  var k = 0;
209
210  // Optimistically assume one-byte string.
211  for ( ; k < uriLength; k++) {
212    var code = %_StringCharCodeAt(uri, k);
213    if (code == 37) {  // '%'
214      if (k + 2 >= uriLength) throw MakeURIError();
215      var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, k+1),
216                                     %_StringCharCodeAt(uri, k+2));
217      if (cc >> 7) break;  // Assumption wrong, two-byte string.
218      if (reserved(cc)) {
219        %_OneByteSeqStringSetChar(index++, 37, one_byte);  // '%'.
220        %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+1),
221                                  one_byte);
222        %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+2),
223                                  one_byte);
224      } else {
225        %_OneByteSeqStringSetChar(index++, cc, one_byte);
226      }
227      k += 2;
228    } else {
229      if (code > 0x7f) break;  // Assumption wrong, two-byte string.
230      %_OneByteSeqStringSetChar(index++, code, one_byte);
231    }
232  }
233
234  one_byte = %TruncateString(one_byte, index);
235  if (k == uriLength) return one_byte;
236
237  // Write into two byte string.
238  var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING);
239  index = 0;
240
241  for ( ; k < uriLength; k++) {
242    var code = %_StringCharCodeAt(uri, k);
243    if (code == 37) {  // '%'
244      if (k + 2 >= uriLength) throw MakeURIError();
245      var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k),
246                                     %_StringCharCodeAt(uri, ++k));
247      if (cc >> 7) {
248        var n = 0;
249        while (((cc << ++n) & 0x80) != 0) { }
250        if (n == 1 || n > 4) throw MakeURIError();
251        var octets = new GlobalArray(n);
252        octets[0] = cc;
253        if (k + 3 * (n - 1) >= uriLength) throw MakeURIError();
254        for (var i = 1; i < n; i++) {
255          if (uri[++k] != '%') throw MakeURIError();
256          octets[i] = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k),
257                                            %_StringCharCodeAt(uri, ++k));
258        }
259        index = URIDecodeOctets(octets, two_byte, index);
260      } else  if (reserved(cc)) {
261        %_TwoByteSeqStringSetChar(index++, 37, two_byte);  // '%'.
262        %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k - 1),
263                                  two_byte);
264        %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k),
265                                  two_byte);
266      } else {
267        %_TwoByteSeqStringSetChar(index++, cc, two_byte);
268      }
269    } else {
270      %_TwoByteSeqStringSetChar(index++, code, two_byte);
271    }
272  }
273
274  two_byte = %TruncateString(two_byte, index);
275  return one_byte + two_byte;
276}
277
278// -------------------------------------------------------------------
279// Define exported functions.
280
281// ECMA-262 - B.2.1.
282function URIEscapeJS(s) {
283  return %URIEscape(s);
284}
285
286// ECMA-262 - B.2.2.
287function URIUnescapeJS(s) {
288  return %URIUnescape(s);
289}
290
291// ECMA-262 - 15.1.3.1.
292function URIDecode(uri) {
293  var reservedPredicate = function(cc) {
294    // #$
295    if (35 <= cc && cc <= 36) return true;
296    // &
297    if (cc == 38) return true;
298    // +,
299    if (43 <= cc && cc <= 44) return true;
300    // /
301    if (cc == 47) return true;
302    // :;
303    if (58 <= cc && cc <= 59) return true;
304    // =
305    if (cc == 61) return true;
306    // ?@
307    if (63 <= cc && cc <= 64) return true;
308
309    return false;
310  };
311  return Decode(uri, reservedPredicate);
312}
313
314// ECMA-262 - 15.1.3.2.
315function URIDecodeComponent(component) {
316  var reservedPredicate = function(cc) { return false; };
317  return Decode(component, reservedPredicate);
318}
319
320// ECMA-262 - 15.1.3.3.
321function URIEncode(uri) {
322  var unescapePredicate = function(cc) {
323    if (isAlphaNumeric(cc)) return true;
324    // !
325    if (cc == 33) return true;
326    // #$
327    if (35 <= cc && cc <= 36) return true;
328    // &'()*+,-./
329    if (38 <= cc && cc <= 47) return true;
330    // :;
331    if (58 <= cc && cc <= 59) return true;
332    // =
333    if (cc == 61) return true;
334    // ?@
335    if (63 <= cc && cc <= 64) return true;
336    // _
337    if (cc == 95) return true;
338    // ~
339    if (cc == 126) return true;
340
341    return false;
342  };
343  return Encode(uri, unescapePredicate);
344}
345
346// ECMA-262 - 15.1.3.4
347function URIEncodeComponent(component) {
348  var unescapePredicate = function(cc) {
349    if (isAlphaNumeric(cc)) return true;
350    // !
351    if (cc == 33) return true;
352    // '()*
353    if (39 <= cc && cc <= 42) return true;
354    // -.
355    if (45 <= cc && cc <= 46) return true;
356    // _
357    if (cc == 95) return true;
358    // ~
359    if (cc == 126) return true;
360
361    return false;
362  };
363  return Encode(component, unescapePredicate);
364}
365
366// -------------------------------------------------------------------
367// Install exported functions.
368
369// Set up non-enumerable URI functions on the global object and set
370// their names.
371utils.InstallFunctions(global, DONT_ENUM, [
372  "escape", URIEscapeJS,
373  "unescape", URIUnescapeJS,
374  "decodeURI", URIDecode,
375  "decodeURIComponent", URIDecodeComponent,
376  "encodeURI", URIEncode,
377  "encodeURIComponent", URIEncodeComponent
378]);
379
380})
381