• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This file relies on the fact that the following declaration has been made
6// in runtime.js:
7// var $Object = global.Object;
8// var $Array = global.Array;
9
10var $RegExp = global.RegExp;
11
12// -------------------------------------------------------------------
13
14// A recursive descent parser for Patterns according to the grammar of
15// ECMA-262 15.10.1, with deviations noted below.
16function DoConstructRegExp(object, pattern, flags) {
17  // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
18  if (IS_REGEXP(pattern)) {
19    if (!IS_UNDEFINED(flags)) {
20      throw MakeTypeError('regexp_flags', []);
21    }
22    flags = (pattern.global ? 'g' : '')
23        + (pattern.ignoreCase ? 'i' : '')
24        + (pattern.multiline ? 'm' : '');
25    pattern = pattern.source;
26  }
27
28  pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
29  flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
30
31  var global = false;
32  var ignoreCase = false;
33  var multiline = false;
34  for (var i = 0; i < flags.length; i++) {
35    var c = %_CallFunction(flags, i, StringCharAt);
36    switch (c) {
37      case 'g':
38        if (global) {
39          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
40        }
41        global = true;
42        break;
43      case 'i':
44        if (ignoreCase) {
45          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
46        }
47        ignoreCase = true;
48        break;
49      case 'm':
50        if (multiline) {
51          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
52        }
53        multiline = true;
54        break;
55      default:
56        throw MakeSyntaxError("invalid_regexp_flags", [flags]);
57    }
58  }
59
60  %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline);
61
62  // Call internal function to compile the pattern.
63  %RegExpCompile(object, pattern, flags);
64}
65
66
67function RegExpConstructor(pattern, flags) {
68  if (%_IsConstructCall()) {
69    DoConstructRegExp(this, pattern, flags);
70  } else {
71    // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
72    if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
73      return pattern;
74    }
75    return new $RegExp(pattern, flags);
76  }
77}
78
79// Deprecated RegExp.prototype.compile method.  We behave like the constructor
80// were called again.  In SpiderMonkey, this method returns the regexp object.
81// In JSC, it returns undefined.  For compatibility with JSC, we match their
82// behavior.
83function RegExpCompileJS(pattern, flags) {
84  // Both JSC and SpiderMonkey treat a missing pattern argument as the
85  // empty subject string, and an actual undefined value passed as the
86  // pattern as the string 'undefined'.  Note that JSC is inconsistent
87  // here, treating undefined values differently in
88  // RegExp.prototype.compile and in the constructor, where they are
89  // the empty string.  For compatibility with JSC, we match their
90  // behavior.
91  if (this == $RegExp.prototype) {
92    // We don't allow recompiling RegExp.prototype.
93    throw MakeTypeError('incompatible_method_receiver',
94                        ['RegExp.prototype.compile', this]);
95  }
96  if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
97    DoConstructRegExp(this, 'undefined', flags);
98  } else {
99    DoConstructRegExp(this, pattern, flags);
100  }
101}
102
103
104function DoRegExpExec(regexp, string, index) {
105  var result = %_RegExpExec(regexp, string, index, lastMatchInfo);
106  if (result !== null) lastMatchInfoOverride = null;
107  return result;
108}
109
110
111// This is kind of performance sensitive, so we want to avoid unnecessary
112// type checks on inputs. But we also don't want to inline it several times
113// manually, so we use a macro :-)
114macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING)
115  var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1;
116  var start = MATCHINFO[CAPTURE0];
117  var end = MATCHINFO[CAPTURE1];
118  // Calculate the substring of the first match before creating the result array
119  // to avoid an unnecessary write barrier storing the first result.
120  var first = %_SubString(STRING, start, end);
121  var result = %_RegExpConstructResult(numResults, start, STRING);
122  result[0] = first;
123  if (numResults == 1) return result;
124  var j = REGEXP_FIRST_CAPTURE + 2;
125  for (var i = 1; i < numResults; i++) {
126    start = MATCHINFO[j++];
127    if (start != -1) {
128      end = MATCHINFO[j];
129      result[i] = %_SubString(STRING, start, end);
130    }
131    j++;
132  }
133  return result;
134endmacro
135
136
137function RegExpExecNoTests(regexp, string, start) {
138  // Must be called with RegExp, string and positive integer as arguments.
139  var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
140  if (matchInfo !== null) {
141    lastMatchInfoOverride = null;
142    RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string);
143  }
144  regexp.lastIndex = 0;
145  return null;
146}
147
148
149function RegExpExec(string) {
150  if (!IS_REGEXP(this)) {
151    throw MakeTypeError('incompatible_method_receiver',
152                        ['RegExp.prototype.exec', this]);
153  }
154
155  string = TO_STRING_INLINE(string);
156  var lastIndex = this.lastIndex;
157
158  // Conversion is required by the ES5 specification (RegExp.prototype.exec
159  // algorithm, step 5) even if the value is discarded for non-global RegExps.
160  var i = TO_INTEGER(lastIndex);
161
162  var global = this.global;
163  if (global) {
164    if (i < 0 || i > string.length) {
165      this.lastIndex = 0;
166      return null;
167    }
168  } else {
169    i = 0;
170  }
171
172  // matchIndices is either null or the lastMatchInfo array.
173  var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
174
175  if (IS_NULL(matchIndices)) {
176    this.lastIndex = 0;
177    return null;
178  }
179
180  // Successful match.
181  lastMatchInfoOverride = null;
182  if (global) {
183    this.lastIndex = lastMatchInfo[CAPTURE1];
184  }
185  RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string);
186}
187
188
189// One-element cache for the simplified test regexp.
190var regexp_key;
191var regexp_val;
192
193// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
194// that test is defined in terms of String.prototype.exec. However, it probably
195// means the original value of String.prototype.exec, which is what everybody
196// else implements.
197function RegExpTest(string) {
198  if (!IS_REGEXP(this)) {
199    throw MakeTypeError('incompatible_method_receiver',
200                        ['RegExp.prototype.test', this]);
201  }
202  string = TO_STRING_INLINE(string);
203
204  var lastIndex = this.lastIndex;
205
206  // Conversion is required by the ES5 specification (RegExp.prototype.exec
207  // algorithm, step 5) even if the value is discarded for non-global RegExps.
208  var i = TO_INTEGER(lastIndex);
209
210  if (this.global) {
211    if (i < 0 || i > string.length) {
212      this.lastIndex = 0;
213      return false;
214    }
215    // matchIndices is either null or the lastMatchInfo array.
216    var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
217    if (IS_NULL(matchIndices)) {
218      this.lastIndex = 0;
219      return false;
220    }
221    lastMatchInfoOverride = null;
222    this.lastIndex = lastMatchInfo[CAPTURE1];
223    return true;
224  } else {
225    // Non-global regexp.
226    // Remove irrelevant preceeding '.*' in a non-global test regexp.
227    // The expression checks whether this.source starts with '.*' and
228    // that the third char is not a '?'.
229    var regexp = this;
230    if (%_StringCharCodeAt(regexp.source, 0) == 46 &&  // '.'
231        %_StringCharCodeAt(regexp.source, 1) == 42 &&  // '*'
232        %_StringCharCodeAt(regexp.source, 2) != 63) {  // '?'
233      regexp = TrimRegExp(regexp);
234    }
235    // matchIndices is either null or the lastMatchInfo array.
236    var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo);
237    if (IS_NULL(matchIndices)) {
238      this.lastIndex = 0;
239      return false;
240    }
241    lastMatchInfoOverride = null;
242    return true;
243  }
244}
245
246function TrimRegExp(regexp) {
247  if (!%_ObjectEquals(regexp_key, regexp)) {
248    regexp_key = regexp;
249    regexp_val =
250      new $RegExp(%_SubString(regexp.source, 2, regexp.source.length),
251                  (regexp.ignoreCase ? regexp.multiline ? "im" : "i"
252                                     : regexp.multiline ? "m" : ""));
253  }
254  return regexp_val;
255}
256
257
258function RegExpToString() {
259  if (!IS_REGEXP(this)) {
260    throw MakeTypeError('incompatible_method_receiver',
261                        ['RegExp.prototype.toString', this]);
262  }
263  var result = '/' + this.source + '/';
264  if (this.global) result += 'g';
265  if (this.ignoreCase) result += 'i';
266  if (this.multiline) result += 'm';
267  return result;
268}
269
270
271// Getters for the static properties lastMatch, lastParen, leftContext, and
272// rightContext of the RegExp constructor.  The properties are computed based
273// on the captures array of the last successful match and the subject string
274// of the last successful match.
275function RegExpGetLastMatch() {
276  if (lastMatchInfoOverride !== null) {
277    return OVERRIDE_MATCH(lastMatchInfoOverride);
278  }
279  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
280  return %_SubString(regExpSubject,
281                     lastMatchInfo[CAPTURE0],
282                     lastMatchInfo[CAPTURE1]);
283}
284
285
286function RegExpGetLastParen() {
287  if (lastMatchInfoOverride) {
288    var override = lastMatchInfoOverride;
289    if (override.length <= 3) return '';
290    return override[override.length - 3];
291  }
292  var length = NUMBER_OF_CAPTURES(lastMatchInfo);
293  if (length <= 2) return '';  // There were no captures.
294  // We match the SpiderMonkey behavior: return the substring defined by the
295  // last pair (after the first pair) of elements of the capture array even if
296  // it is empty.
297  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
298  var start = lastMatchInfo[CAPTURE(length - 2)];
299  var end = lastMatchInfo[CAPTURE(length - 1)];
300  if (start != -1 && end != -1) {
301    return %_SubString(regExpSubject, start, end);
302  }
303  return "";
304}
305
306
307function RegExpGetLeftContext() {
308  var start_index;
309  var subject;
310  if (!lastMatchInfoOverride) {
311    start_index = lastMatchInfo[CAPTURE0];
312    subject = LAST_SUBJECT(lastMatchInfo);
313  } else {
314    var override = lastMatchInfoOverride;
315    start_index = OVERRIDE_POS(override);
316    subject = OVERRIDE_SUBJECT(override);
317  }
318  return %_SubString(subject, 0, start_index);
319}
320
321
322function RegExpGetRightContext() {
323  var start_index;
324  var subject;
325  if (!lastMatchInfoOverride) {
326    start_index = lastMatchInfo[CAPTURE1];
327    subject = LAST_SUBJECT(lastMatchInfo);
328  } else {
329    var override = lastMatchInfoOverride;
330    subject = OVERRIDE_SUBJECT(override);
331    var match = OVERRIDE_MATCH(override);
332    start_index = OVERRIDE_POS(override) + match.length;
333  }
334  return %_SubString(subject, start_index, subject.length);
335}
336
337
338// The properties $1..$9 are the first nine capturing substrings of the last
339// successful match, or ''.  The function RegExpMakeCaptureGetter will be
340// called with indices from 1 to 9.
341function RegExpMakeCaptureGetter(n) {
342  return function() {
343    if (lastMatchInfoOverride) {
344      if (n < lastMatchInfoOverride.length - 2) {
345        return OVERRIDE_CAPTURE(lastMatchInfoOverride, n);
346      }
347      return '';
348    }
349    var index = n * 2;
350    if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
351    var matchStart = lastMatchInfo[CAPTURE(index)];
352    var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
353    if (matchStart == -1 || matchEnd == -1) return '';
354    return %_SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
355  };
356}
357
358
359// Property of the builtins object for recording the result of the last
360// regexp match.  The property lastMatchInfo includes the matchIndices
361// array of the last successful regexp match (an array of start/end index
362// pairs for the match and all the captured substrings), the invariant is
363// that there are at least two capture indeces.  The array also contains
364// the subject string for the last successful match.
365var lastMatchInfo = new InternalPackedArray(
366    2,                 // REGEXP_NUMBER_OF_CAPTURES
367    "",                // Last subject.
368    UNDEFINED,         // Last input - settable with RegExpSetInput.
369    0,                 // REGEXP_FIRST_CAPTURE + 0
370    0                  // REGEXP_FIRST_CAPTURE + 1
371);
372
373// Override last match info with an array of actual substrings.
374// Used internally by replace regexp with function.
375// The array has the format of an "apply" argument for a replacement
376// function.
377var lastMatchInfoOverride = null;
378
379// -------------------------------------------------------------------
380
381function SetUpRegExp() {
382  %CheckIsBootstrapping();
383  %FunctionSetInstanceClassName($RegExp, 'RegExp');
384  %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
385  %SetCode($RegExp, RegExpConstructor);
386
387  InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
388    "exec", RegExpExec,
389    "test", RegExpTest,
390    "toString", RegExpToString,
391    "compile", RegExpCompileJS
392  ));
393
394  // The length of compile is 1 in SpiderMonkey.
395  %FunctionSetLength($RegExp.prototype.compile, 1);
396
397  // The properties input, $input, and $_ are aliases for each other.  When this
398  // value is set the value it is set to is coerced to a string.
399  // Getter and setter for the input.
400  var RegExpGetInput = function() {
401    var regExpInput = LAST_INPUT(lastMatchInfo);
402    return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
403  };
404  var RegExpSetInput = function(string) {
405    LAST_INPUT(lastMatchInfo) = ToString(string);
406  };
407
408  %OptimizeObjectForAddingMultipleProperties($RegExp, 22);
409  %DefineOrRedefineAccessorProperty($RegExp, 'input', RegExpGetInput,
410                                    RegExpSetInput, DONT_DELETE);
411  %DefineOrRedefineAccessorProperty($RegExp, '$_', RegExpGetInput,
412                                    RegExpSetInput, DONT_ENUM | DONT_DELETE);
413  %DefineOrRedefineAccessorProperty($RegExp, '$input', RegExpGetInput,
414                                    RegExpSetInput, DONT_ENUM | DONT_DELETE);
415
416  // The properties multiline and $* are aliases for each other.  When this
417  // value is set in SpiderMonkey, the value it is set to is coerced to a
418  // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
419  // the value of the expression 'RegExp.multiline = null' (for instance) is the
420  // boolean false (i.e., the value after coercion), while in V8 it is the value
421  // null (i.e., the value before coercion).
422
423  // Getter and setter for multiline.
424  var multiline = false;
425  var RegExpGetMultiline = function() { return multiline; };
426  var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; };
427
428  %DefineOrRedefineAccessorProperty($RegExp, 'multiline', RegExpGetMultiline,
429                                    RegExpSetMultiline, DONT_DELETE);
430  %DefineOrRedefineAccessorProperty($RegExp, '$*', RegExpGetMultiline,
431                                    RegExpSetMultiline,
432                                    DONT_ENUM | DONT_DELETE);
433
434
435  var NoOpSetter = function(ignored) {};
436
437
438  // Static properties set by a successful match.
439  %DefineOrRedefineAccessorProperty($RegExp, 'lastMatch', RegExpGetLastMatch,
440                                    NoOpSetter, DONT_DELETE);
441  %DefineOrRedefineAccessorProperty($RegExp, '$&', RegExpGetLastMatch,
442                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
443  %DefineOrRedefineAccessorProperty($RegExp, 'lastParen', RegExpGetLastParen,
444                                    NoOpSetter, DONT_DELETE);
445  %DefineOrRedefineAccessorProperty($RegExp, '$+', RegExpGetLastParen,
446                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
447  %DefineOrRedefineAccessorProperty($RegExp, 'leftContext',
448                                    RegExpGetLeftContext, NoOpSetter,
449                                    DONT_DELETE);
450  %DefineOrRedefineAccessorProperty($RegExp, '$`', RegExpGetLeftContext,
451                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
452  %DefineOrRedefineAccessorProperty($RegExp, 'rightContext',
453                                    RegExpGetRightContext, NoOpSetter,
454                                    DONT_DELETE);
455  %DefineOrRedefineAccessorProperty($RegExp, "$'", RegExpGetRightContext,
456                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
457
458  for (var i = 1; i < 10; ++i) {
459    %DefineOrRedefineAccessorProperty($RegExp, '$' + i,
460                                      RegExpMakeCaptureGetter(i), NoOpSetter,
461                                      DONT_DELETE);
462  }
463  %ToFastProperties($RegExp);
464}
465
466SetUpRegExp();
467