• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2006-2009 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// Expect $Object = global.Object;
29// Expect $Array = global.Array;
30
31const $RegExp = global.RegExp;
32
33// A recursive descent parser for Patterns according to the grammar of
34// ECMA-262 15.10.1, with deviations noted below.
35function DoConstructRegExp(object, pattern, flags) {
36  // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
37  if (IS_REGEXP(pattern)) {
38    if (!IS_UNDEFINED(flags)) {
39      throw MakeTypeError('regexp_flags', []);
40    }
41    flags = (pattern.global ? 'g' : '')
42        + (pattern.ignoreCase ? 'i' : '')
43        + (pattern.multiline ? 'm' : '');
44    pattern = pattern.source;
45  }
46
47  pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
48  flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
49
50  var global = false;
51  var ignoreCase = false;
52  var multiline = false;
53
54  for (var i = 0; i < flags.length; i++) {
55    var c = %_CallFunction(flags, i, StringCharAt);
56    switch (c) {
57      case 'g':
58        // Allow duplicate flags to be consistent with JSC and others.
59        global = true;
60        break;
61      case 'i':
62        ignoreCase = true;
63        break;
64      case 'm':
65        multiline = true;
66        break;
67      default:
68        // Ignore flags that have no meaning to be consistent with
69        // JSC.
70        break;
71    }
72  }
73
74  %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline);
75
76  // Call internal function to compile the pattern.
77  %RegExpCompile(object, pattern, flags);
78}
79
80
81function RegExpConstructor(pattern, flags) {
82  if (%_IsConstructCall()) {
83    DoConstructRegExp(this, pattern, flags);
84  } else {
85    // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
86    if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
87      return pattern;
88    }
89    return new $RegExp(pattern, flags);
90  }
91}
92
93
94// Deprecated RegExp.prototype.compile method.  We behave like the constructor
95// were called again.  In SpiderMonkey, this method returns the regexp object.
96// In JSC, it returns undefined.  For compatibility with JSC, we match their
97// behavior.
98function CompileRegExp(pattern, flags) {
99  // Both JSC and SpiderMonkey treat a missing pattern argument as the
100  // empty subject string, and an actual undefined value passed as the
101  // pattern as the string 'undefined'.  Note that JSC is inconsistent
102  // here, treating undefined values differently in
103  // RegExp.prototype.compile and in the constructor, where they are
104  // the empty string.  For compatibility with JSC, we match their
105  // behavior.
106  if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
107    DoConstructRegExp(this, 'undefined', flags);
108  } else {
109    DoConstructRegExp(this, pattern, flags);
110  }
111}
112
113
114function DoRegExpExec(regexp, string, index) {
115  var result = %_RegExpExec(regexp, string, index, lastMatchInfo);
116  if (result !== null) lastMatchInfoOverride = null;
117  return result;
118}
119
120
121function BuildResultFromMatchInfo(lastMatchInfo, s) {
122  var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;
123  var start = lastMatchInfo[CAPTURE0];
124  var end = lastMatchInfo[CAPTURE1];
125  var result = %_RegExpConstructResult(numResults, start, s);
126  if (start + 1 == end) {
127    result[0] = %_StringCharAt(s, start);
128  } else {
129    result[0] = %_SubString(s, start, end);
130  }
131  var j = REGEXP_FIRST_CAPTURE + 2;
132  for (var i = 1; i < numResults; i++) {
133    start = lastMatchInfo[j++];
134    end = lastMatchInfo[j++];
135    if (end != -1) {
136      if (start + 1 == end) {
137        result[i] = %_StringCharAt(s, start);
138      } else {
139        result[i] = %_SubString(s, start, end);
140      }
141    } else {
142      // Make sure the element is present. Avoid reading the undefined
143      // property from the global object since this may change.
144      result[i] = void 0;
145    }
146  }
147  return result;
148}
149
150
151function RegExpExecNoTests(regexp, string, start) {
152  // Must be called with RegExp, string and positive integer as arguments.
153  var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
154  if (matchInfo !== null) {
155    lastMatchInfoOverride = null;
156    return BuildResultFromMatchInfo(matchInfo, string);
157  }
158  return null;
159}
160
161
162function RegExpExec(string) {
163  if (!IS_REGEXP(this)) {
164    throw MakeTypeError('incompatible_method_receiver',
165                        ['RegExp.prototype.exec', this]);
166  }
167
168  if (%_ArgumentsLength() === 0) {
169    var regExpInput = LAST_INPUT(lastMatchInfo);
170    if (IS_UNDEFINED(regExpInput)) {
171      throw MakeError('no_input_to_regexp', [this]);
172    }
173    string = regExpInput;
174  }
175  string = TO_STRING_INLINE(string);
176  var lastIndex = this.lastIndex;
177
178  // Conversion is required by the ES5 specification (RegExp.prototype.exec
179  // algorithm, step 5) even if the value is discarded for non-global RegExps.
180  var i = TO_INTEGER(lastIndex);
181
182  var global = this.global;
183  if (global) {
184    if (i < 0 || i > string.length) {
185      this.lastIndex = 0;
186      return null;
187    }
188  } else {
189    i = 0;
190  }
191
192  %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
193  // matchIndices is either null or the lastMatchInfo array.
194  var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
195
196  if (matchIndices === null) {
197    if (global) this.lastIndex = 0;
198    return null;
199  }
200
201  // Successful match.
202  lastMatchInfoOverride = null;
203  if (global) {
204    this.lastIndex = lastMatchInfo[CAPTURE1];
205  }
206  return BuildResultFromMatchInfo(matchIndices, string);
207}
208
209
210// One-element cache for the simplified test regexp.
211var regexp_key;
212var regexp_val;
213
214// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
215// that test is defined in terms of String.prototype.exec. However, it probably
216// means the original value of String.prototype.exec, which is what everybody
217// else implements.
218function RegExpTest(string) {
219  if (!IS_REGEXP(this)) {
220    throw MakeTypeError('incompatible_method_receiver',
221                        ['RegExp.prototype.test', this]);
222  }
223  if (%_ArgumentsLength() == 0) {
224    var regExpInput = LAST_INPUT(lastMatchInfo);
225    if (IS_UNDEFINED(regExpInput)) {
226      throw MakeError('no_input_to_regexp', [this]);
227    }
228    string = regExpInput;
229  }
230
231  string = TO_STRING_INLINE(string);
232
233  var lastIndex = this.lastIndex;
234
235  // Conversion is required by the ES5 specification (RegExp.prototype.exec
236  // algorithm, step 5) even if the value is discarded for non-global RegExps.
237  var i = TO_INTEGER(lastIndex);
238
239  if (this.global) {
240    if (i < 0 || i > string.length) {
241      this.lastIndex = 0;
242      return false;
243    }
244    %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
245    // matchIndices is either null or the lastMatchInfo array.
246    var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
247    if (matchIndices === null) {
248      this.lastIndex = 0;
249      return false;
250    }
251    lastMatchInfoOverride = null;
252    this.lastIndex = lastMatchInfo[CAPTURE1];
253    return true;
254  } else {
255    // Non-global regexp.
256    // Remove irrelevant preceeding '.*' in a non-global test regexp.
257    // The expression checks whether this.source starts with '.*' and
258    // that the third char is not a '?'.
259    if (%_StringCharCodeAt(this.source, 0) == 46 &&  // '.'
260        %_StringCharCodeAt(this.source, 1) == 42 &&  // '*'
261        %_StringCharCodeAt(this.source, 2) != 63) {  // '?'
262      if (!%_ObjectEquals(regexp_key, this)) {
263        regexp_key = this;
264        regexp_val = new $RegExp(SubString(this.source, 2, this.source.length),
265                                 (!this.ignoreCase
266                                  ? !this.multiline ? "" : "m"
267                                  : !this.multiline ? "i" : "im"));
268      }
269      if (%_RegExpExec(regexp_val, string, 0, lastMatchInfo) === null) {
270        return false;
271      }
272    }
273    %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
274    // matchIndices is either null or the lastMatchInfo array.
275    var matchIndices = %_RegExpExec(this, string, 0, lastMatchInfo);
276    if (matchIndices === null) return false;
277    lastMatchInfoOverride = null;
278    return true;
279  }
280}
281
282
283function RegExpToString() {
284  // If this.source is an empty string, output /(?:)/.
285  // http://bugzilla.mozilla.org/show_bug.cgi?id=225550
286  // ecma_2/RegExp/properties-001.js.
287  var src = this.source ? this.source : '(?:)';
288  var result = '/' + src + '/';
289  if (this.global) result += 'g';
290  if (this.ignoreCase) result += 'i';
291  if (this.multiline) result += 'm';
292  return result;
293}
294
295
296// Getters for the static properties lastMatch, lastParen, leftContext, and
297// rightContext of the RegExp constructor.  The properties are computed based
298// on the captures array of the last successful match and the subject string
299// of the last successful match.
300function RegExpGetLastMatch() {
301  if (lastMatchInfoOverride !== null) {
302    return lastMatchInfoOverride[0];
303  }
304  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
305  return SubString(regExpSubject,
306                   lastMatchInfo[CAPTURE0],
307                   lastMatchInfo[CAPTURE1]);
308}
309
310
311function RegExpGetLastParen() {
312  if (lastMatchInfoOverride) {
313    var override = lastMatchInfoOverride;
314    if (override.length <= 3) return '';
315    return override[override.length - 3];
316  }
317  var length = NUMBER_OF_CAPTURES(lastMatchInfo);
318  if (length <= 2) return '';  // There were no captures.
319  // We match the SpiderMonkey behavior: return the substring defined by the
320  // last pair (after the first pair) of elements of the capture array even if
321  // it is empty.
322  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
323  var start = lastMatchInfo[CAPTURE(length - 2)];
324  var end = lastMatchInfo[CAPTURE(length - 1)];
325  if (start != -1 && end != -1) {
326    return SubString(regExpSubject, start, end);
327  }
328  return "";
329}
330
331
332function RegExpGetLeftContext() {
333  var start_index;
334  var subject;
335  if (!lastMatchInfoOverride) {
336    start_index = lastMatchInfo[CAPTURE0];
337    subject = LAST_SUBJECT(lastMatchInfo);
338  } else {
339    var override = lastMatchInfoOverride;
340    start_index = override[override.length - 2];
341    subject = override[override.length - 1];
342  }
343  return SubString(subject, 0, start_index);
344}
345
346
347function RegExpGetRightContext() {
348  var start_index;
349  var subject;
350  if (!lastMatchInfoOverride) {
351    start_index = lastMatchInfo[CAPTURE1];
352    subject = LAST_SUBJECT(lastMatchInfo);
353  } else {
354    var override = lastMatchInfoOverride;
355    subject = override[override.length - 1];
356    start_index = override[override.length - 2] + subject.length;
357  }
358  return SubString(subject, start_index, subject.length);
359}
360
361
362// The properties $1..$9 are the first nine capturing substrings of the last
363// successful match, or ''.  The function RegExpMakeCaptureGetter will be
364// called with indices from 1 to 9.
365function RegExpMakeCaptureGetter(n) {
366  return function() {
367    if (lastMatchInfoOverride) {
368      if (n < lastMatchInfoOverride.length - 2) return lastMatchInfoOverride[n];
369      return '';
370    }
371    var index = n * 2;
372    if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
373    var matchStart = lastMatchInfo[CAPTURE(index)];
374    var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
375    if (matchStart == -1 || matchEnd == -1) return '';
376    return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
377  };
378}
379
380
381// Property of the builtins object for recording the result of the last
382// regexp match.  The property lastMatchInfo includes the matchIndices
383// array of the last successful regexp match (an array of start/end index
384// pairs for the match and all the captured substrings), the invariant is
385// that there are at least two capture indeces.  The array also contains
386// the subject string for the last successful match.
387var lastMatchInfo = new InternalArray(
388    2,                 // REGEXP_NUMBER_OF_CAPTURES
389    "",                // Last subject.
390    void 0,            // Last input - settable with RegExpSetInput.
391    0,                 // REGEXP_FIRST_CAPTURE + 0
392    0                  // REGEXP_FIRST_CAPTURE + 1
393);
394
395// Override last match info with an array of actual substrings.
396// Used internally by replace regexp with function.
397// The array has the format of an "apply" argument for a replacement
398// function.
399var lastMatchInfoOverride = null;
400
401// -------------------------------------------------------------------
402
403function SetupRegExp() {
404  %FunctionSetInstanceClassName($RegExp, 'RegExp');
405  %FunctionSetPrototype($RegExp, new $Object());
406  %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
407  %SetCode($RegExp, RegExpConstructor);
408
409  InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
410    "exec", RegExpExec,
411    "test", RegExpTest,
412    "toString", RegExpToString,
413    "compile", CompileRegExp
414  ));
415
416  // The length of compile is 1 in SpiderMonkey.
417  %FunctionSetLength($RegExp.prototype.compile, 1);
418
419  // The properties input, $input, and $_ are aliases for each other.  When this
420  // value is set the value it is set to is coerced to a string.
421  // Getter and setter for the input.
422  function RegExpGetInput() {
423    var regExpInput = LAST_INPUT(lastMatchInfo);
424    return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
425  }
426  function RegExpSetInput(string) {
427    LAST_INPUT(lastMatchInfo) = ToString(string);
428  };
429
430  %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE);
431  %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE);
432  %DefineAccessor($RegExp, '$_', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE);
433  %DefineAccessor($RegExp, '$_', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE);
434  %DefineAccessor($RegExp, '$input', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE);
435  %DefineAccessor($RegExp, '$input', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE);
436
437  // The properties multiline and $* are aliases for each other.  When this
438  // value is set in SpiderMonkey, the value it is set to is coerced to a
439  // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
440  // the value of the expression 'RegExp.multiline = null' (for instance) is the
441  // boolean false (ie, the value after coercion), while in V8 it is the value
442  // null (ie, the value before coercion).
443
444  // Getter and setter for multiline.
445  var multiline = false;
446  function RegExpGetMultiline() { return multiline; };
447  function RegExpSetMultiline(flag) { multiline = flag ? true : false; };
448
449  %DefineAccessor($RegExp, 'multiline', GETTER, RegExpGetMultiline, DONT_DELETE);
450  %DefineAccessor($RegExp, 'multiline', SETTER, RegExpSetMultiline, DONT_DELETE);
451  %DefineAccessor($RegExp, '$*', GETTER, RegExpGetMultiline, DONT_ENUM | DONT_DELETE);
452  %DefineAccessor($RegExp, '$*', SETTER, RegExpSetMultiline, DONT_ENUM | DONT_DELETE);
453
454
455  function NoOpSetter(ignored) {}
456
457
458  // Static properties set by a successful match.
459  %DefineAccessor($RegExp, 'lastMatch', GETTER, RegExpGetLastMatch, DONT_DELETE);
460  %DefineAccessor($RegExp, 'lastMatch', SETTER, NoOpSetter, DONT_DELETE);
461  %DefineAccessor($RegExp, '$&', GETTER, RegExpGetLastMatch, DONT_ENUM | DONT_DELETE);
462  %DefineAccessor($RegExp, '$&', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
463  %DefineAccessor($RegExp, 'lastParen', GETTER, RegExpGetLastParen, DONT_DELETE);
464  %DefineAccessor($RegExp, 'lastParen', SETTER, NoOpSetter, DONT_DELETE);
465  %DefineAccessor($RegExp, '$+', GETTER, RegExpGetLastParen, DONT_ENUM | DONT_DELETE);
466  %DefineAccessor($RegExp, '$+', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
467  %DefineAccessor($RegExp, 'leftContext', GETTER, RegExpGetLeftContext, DONT_DELETE);
468  %DefineAccessor($RegExp, 'leftContext', SETTER, NoOpSetter, DONT_DELETE);
469  %DefineAccessor($RegExp, '$`', GETTER, RegExpGetLeftContext, DONT_ENUM | DONT_DELETE);
470  %DefineAccessor($RegExp, '$`', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
471  %DefineAccessor($RegExp, 'rightContext', GETTER, RegExpGetRightContext, DONT_DELETE);
472  %DefineAccessor($RegExp, 'rightContext', SETTER, NoOpSetter, DONT_DELETE);
473  %DefineAccessor($RegExp, "$'", GETTER, RegExpGetRightContext, DONT_ENUM | DONT_DELETE);
474  %DefineAccessor($RegExp, "$'", SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
475
476  for (var i = 1; i < 10; ++i) {
477    %DefineAccessor($RegExp, '$' + i, GETTER, RegExpMakeCaptureGetter(i), DONT_DELETE);
478    %DefineAccessor($RegExp, '$' + i, SETTER, NoOpSetter, DONT_DELETE);
479  }
480}
481
482
483SetupRegExp();
484