1// Copyright 2006-2009 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28// Expect $Object = global.Object; 29// Expect $Array = global.Array; 30 31const $RegExp = global.RegExp; 32 33// A recursive descent parser for Patterns according to the grammar of 34// ECMA-262 15.10.1, with deviations noted below. 35function DoConstructRegExp(object, pattern, flags) { 36 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. 37 if (IS_REGEXP(pattern)) { 38 if (!IS_UNDEFINED(flags)) { 39 throw MakeTypeError('regexp_flags', []); 40 } 41 flags = (pattern.global ? 'g' : '') 42 + (pattern.ignoreCase ? 'i' : '') 43 + (pattern.multiline ? 'm' : ''); 44 pattern = pattern.source; 45 } 46 47 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); 48 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); 49 50 var global = false; 51 var ignoreCase = false; 52 var multiline = false; 53 54 for (var i = 0; i < flags.length; i++) { 55 var c = %_CallFunction(flags, i, StringCharAt); 56 switch (c) { 57 case 'g': 58 // Allow duplicate flags to be consistent with JSC and others. 59 global = true; 60 break; 61 case 'i': 62 ignoreCase = true; 63 break; 64 case 'm': 65 multiline = true; 66 break; 67 default: 68 // Ignore flags that have no meaning to be consistent with 69 // JSC. 70 break; 71 } 72 } 73 74 %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline); 75 76 // Call internal function to compile the pattern. 77 %RegExpCompile(object, pattern, flags); 78} 79 80 81function RegExpConstructor(pattern, flags) { 82 if (%_IsConstructCall()) { 83 DoConstructRegExp(this, pattern, flags); 84 } else { 85 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. 86 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { 87 return pattern; 88 } 89 return new $RegExp(pattern, flags); 90 } 91} 92 93 94// Deprecated RegExp.prototype.compile method. We behave like the constructor 95// were called again. In SpiderMonkey, this method returns the regexp object. 96// In JSC, it returns undefined. For compatibility with JSC, we match their 97// behavior. 98function CompileRegExp(pattern, flags) { 99 // Both JSC and SpiderMonkey treat a missing pattern argument as the 100 // empty subject string, and an actual undefined value passed as the 101 // pattern as the string 'undefined'. Note that JSC is inconsistent 102 // here, treating undefined values differently in 103 // RegExp.prototype.compile and in the constructor, where they are 104 // the empty string. For compatibility with JSC, we match their 105 // behavior. 106 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { 107 DoConstructRegExp(this, 'undefined', flags); 108 } else { 109 DoConstructRegExp(this, pattern, flags); 110 } 111} 112 113 114function DoRegExpExec(regexp, string, index) { 115 var result = %_RegExpExec(regexp, string, index, lastMatchInfo); 116 if (result !== null) lastMatchInfoOverride = null; 117 return result; 118} 119 120 121function BuildResultFromMatchInfo(lastMatchInfo, s) { 122 var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; 123 var start = lastMatchInfo[CAPTURE0]; 124 var end = lastMatchInfo[CAPTURE1]; 125 var result = %_RegExpConstructResult(numResults, start, s); 126 if (start + 1 == end) { 127 result[0] = %_StringCharAt(s, start); 128 } else { 129 result[0] = %_SubString(s, start, end); 130 } 131 var j = REGEXP_FIRST_CAPTURE + 2; 132 for (var i = 1; i < numResults; i++) { 133 start = lastMatchInfo[j++]; 134 end = lastMatchInfo[j++]; 135 if (end != -1) { 136 if (start + 1 == end) { 137 result[i] = %_StringCharAt(s, start); 138 } else { 139 result[i] = %_SubString(s, start, end); 140 } 141 } else { 142 // Make sure the element is present. Avoid reading the undefined 143 // property from the global object since this may change. 144 result[i] = void 0; 145 } 146 } 147 return result; 148} 149 150 151function RegExpExecNoTests(regexp, string, start) { 152 // Must be called with RegExp, string and positive integer as arguments. 153 var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo); 154 if (matchInfo !== null) { 155 lastMatchInfoOverride = null; 156 return BuildResultFromMatchInfo(matchInfo, string); 157 } 158 return null; 159} 160 161 162function RegExpExec(string) { 163 if (!IS_REGEXP(this)) { 164 throw MakeTypeError('incompatible_method_receiver', 165 ['RegExp.prototype.exec', this]); 166 } 167 168 if (%_ArgumentsLength() === 0) { 169 var regExpInput = LAST_INPUT(lastMatchInfo); 170 if (IS_UNDEFINED(regExpInput)) { 171 throw MakeError('no_input_to_regexp', [this]); 172 } 173 string = regExpInput; 174 } 175 string = TO_STRING_INLINE(string); 176 var lastIndex = this.lastIndex; 177 178 // Conversion is required by the ES5 specification (RegExp.prototype.exec 179 // algorithm, step 5) even if the value is discarded for non-global RegExps. 180 var i = TO_INTEGER(lastIndex); 181 182 var global = this.global; 183 if (global) { 184 if (i < 0 || i > string.length) { 185 this.lastIndex = 0; 186 return null; 187 } 188 } else { 189 i = 0; 190 } 191 192 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]); 193 // matchIndices is either null or the lastMatchInfo array. 194 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 195 196 if (matchIndices === null) { 197 if (global) this.lastIndex = 0; 198 return null; 199 } 200 201 // Successful match. 202 lastMatchInfoOverride = null; 203 if (global) { 204 this.lastIndex = lastMatchInfo[CAPTURE1]; 205 } 206 return BuildResultFromMatchInfo(matchIndices, string); 207} 208 209 210// One-element cache for the simplified test regexp. 211var regexp_key; 212var regexp_val; 213 214// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be 215// that test is defined in terms of String.prototype.exec. However, it probably 216// means the original value of String.prototype.exec, which is what everybody 217// else implements. 218function RegExpTest(string) { 219 if (!IS_REGEXP(this)) { 220 throw MakeTypeError('incompatible_method_receiver', 221 ['RegExp.prototype.test', this]); 222 } 223 if (%_ArgumentsLength() == 0) { 224 var regExpInput = LAST_INPUT(lastMatchInfo); 225 if (IS_UNDEFINED(regExpInput)) { 226 throw MakeError('no_input_to_regexp', [this]); 227 } 228 string = regExpInput; 229 } 230 231 string = TO_STRING_INLINE(string); 232 233 var lastIndex = this.lastIndex; 234 235 // Conversion is required by the ES5 specification (RegExp.prototype.exec 236 // algorithm, step 5) even if the value is discarded for non-global RegExps. 237 var i = TO_INTEGER(lastIndex); 238 239 if (this.global) { 240 if (i < 0 || i > string.length) { 241 this.lastIndex = 0; 242 return false; 243 } 244 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]); 245 // matchIndices is either null or the lastMatchInfo array. 246 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 247 if (matchIndices === null) { 248 this.lastIndex = 0; 249 return false; 250 } 251 lastMatchInfoOverride = null; 252 this.lastIndex = lastMatchInfo[CAPTURE1]; 253 return true; 254 } else { 255 // Non-global regexp. 256 // Remove irrelevant preceeding '.*' in a non-global test regexp. 257 // The expression checks whether this.source starts with '.*' and 258 // that the third char is not a '?'. 259 if (%_StringCharCodeAt(this.source, 0) == 46 && // '.' 260 %_StringCharCodeAt(this.source, 1) == 42 && // '*' 261 %_StringCharCodeAt(this.source, 2) != 63) { // '?' 262 if (!%_ObjectEquals(regexp_key, this)) { 263 regexp_key = this; 264 regexp_val = new $RegExp(SubString(this.source, 2, this.source.length), 265 (!this.ignoreCase 266 ? !this.multiline ? "" : "m" 267 : !this.multiline ? "i" : "im")); 268 } 269 if (%_RegExpExec(regexp_val, string, 0, lastMatchInfo) === null) { 270 return false; 271 } 272 } 273 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]); 274 // matchIndices is either null or the lastMatchInfo array. 275 var matchIndices = %_RegExpExec(this, string, 0, lastMatchInfo); 276 if (matchIndices === null) return false; 277 lastMatchInfoOverride = null; 278 return true; 279 } 280} 281 282 283function RegExpToString() { 284 // If this.source is an empty string, output /(?:)/. 285 // http://bugzilla.mozilla.org/show_bug.cgi?id=225550 286 // ecma_2/RegExp/properties-001.js. 287 var src = this.source ? this.source : '(?:)'; 288 var result = '/' + src + '/'; 289 if (this.global) result += 'g'; 290 if (this.ignoreCase) result += 'i'; 291 if (this.multiline) result += 'm'; 292 return result; 293} 294 295 296// Getters for the static properties lastMatch, lastParen, leftContext, and 297// rightContext of the RegExp constructor. The properties are computed based 298// on the captures array of the last successful match and the subject string 299// of the last successful match. 300function RegExpGetLastMatch() { 301 if (lastMatchInfoOverride !== null) { 302 return lastMatchInfoOverride[0]; 303 } 304 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 305 return SubString(regExpSubject, 306 lastMatchInfo[CAPTURE0], 307 lastMatchInfo[CAPTURE1]); 308} 309 310 311function RegExpGetLastParen() { 312 if (lastMatchInfoOverride) { 313 var override = lastMatchInfoOverride; 314 if (override.length <= 3) return ''; 315 return override[override.length - 3]; 316 } 317 var length = NUMBER_OF_CAPTURES(lastMatchInfo); 318 if (length <= 2) return ''; // There were no captures. 319 // We match the SpiderMonkey behavior: return the substring defined by the 320 // last pair (after the first pair) of elements of the capture array even if 321 // it is empty. 322 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 323 var start = lastMatchInfo[CAPTURE(length - 2)]; 324 var end = lastMatchInfo[CAPTURE(length - 1)]; 325 if (start != -1 && end != -1) { 326 return SubString(regExpSubject, start, end); 327 } 328 return ""; 329} 330 331 332function RegExpGetLeftContext() { 333 var start_index; 334 var subject; 335 if (!lastMatchInfoOverride) { 336 start_index = lastMatchInfo[CAPTURE0]; 337 subject = LAST_SUBJECT(lastMatchInfo); 338 } else { 339 var override = lastMatchInfoOverride; 340 start_index = override[override.length - 2]; 341 subject = override[override.length - 1]; 342 } 343 return SubString(subject, 0, start_index); 344} 345 346 347function RegExpGetRightContext() { 348 var start_index; 349 var subject; 350 if (!lastMatchInfoOverride) { 351 start_index = lastMatchInfo[CAPTURE1]; 352 subject = LAST_SUBJECT(lastMatchInfo); 353 } else { 354 var override = lastMatchInfoOverride; 355 subject = override[override.length - 1]; 356 start_index = override[override.length - 2] + subject.length; 357 } 358 return SubString(subject, start_index, subject.length); 359} 360 361 362// The properties $1..$9 are the first nine capturing substrings of the last 363// successful match, or ''. The function RegExpMakeCaptureGetter will be 364// called with indices from 1 to 9. 365function RegExpMakeCaptureGetter(n) { 366 return function() { 367 if (lastMatchInfoOverride) { 368 if (n < lastMatchInfoOverride.length - 2) return lastMatchInfoOverride[n]; 369 return ''; 370 } 371 var index = n * 2; 372 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; 373 var matchStart = lastMatchInfo[CAPTURE(index)]; 374 var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; 375 if (matchStart == -1 || matchEnd == -1) return ''; 376 return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); 377 }; 378} 379 380 381// Property of the builtins object for recording the result of the last 382// regexp match. The property lastMatchInfo includes the matchIndices 383// array of the last successful regexp match (an array of start/end index 384// pairs for the match and all the captured substrings), the invariant is 385// that there are at least two capture indeces. The array also contains 386// the subject string for the last successful match. 387var lastMatchInfo = new InternalArray( 388 2, // REGEXP_NUMBER_OF_CAPTURES 389 "", // Last subject. 390 void 0, // Last input - settable with RegExpSetInput. 391 0, // REGEXP_FIRST_CAPTURE + 0 392 0 // REGEXP_FIRST_CAPTURE + 1 393); 394 395// Override last match info with an array of actual substrings. 396// Used internally by replace regexp with function. 397// The array has the format of an "apply" argument for a replacement 398// function. 399var lastMatchInfoOverride = null; 400 401// ------------------------------------------------------------------- 402 403function SetupRegExp() { 404 %FunctionSetInstanceClassName($RegExp, 'RegExp'); 405 %FunctionSetPrototype($RegExp, new $Object()); 406 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); 407 %SetCode($RegExp, RegExpConstructor); 408 409 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( 410 "exec", RegExpExec, 411 "test", RegExpTest, 412 "toString", RegExpToString, 413 "compile", CompileRegExp 414 )); 415 416 // The length of compile is 1 in SpiderMonkey. 417 %FunctionSetLength($RegExp.prototype.compile, 1); 418 419 // The properties input, $input, and $_ are aliases for each other. When this 420 // value is set the value it is set to is coerced to a string. 421 // Getter and setter for the input. 422 function RegExpGetInput() { 423 var regExpInput = LAST_INPUT(lastMatchInfo); 424 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; 425 } 426 function RegExpSetInput(string) { 427 LAST_INPUT(lastMatchInfo) = ToString(string); 428 }; 429 430 %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE); 431 %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE); 432 %DefineAccessor($RegExp, '$_', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE); 433 %DefineAccessor($RegExp, '$_', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE); 434 %DefineAccessor($RegExp, '$input', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE); 435 %DefineAccessor($RegExp, '$input', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE); 436 437 // The properties multiline and $* are aliases for each other. When this 438 // value is set in SpiderMonkey, the value it is set to is coerced to a 439 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey 440 // the value of the expression 'RegExp.multiline = null' (for instance) is the 441 // boolean false (ie, the value after coercion), while in V8 it is the value 442 // null (ie, the value before coercion). 443 444 // Getter and setter for multiline. 445 var multiline = false; 446 function RegExpGetMultiline() { return multiline; }; 447 function RegExpSetMultiline(flag) { multiline = flag ? true : false; }; 448 449 %DefineAccessor($RegExp, 'multiline', GETTER, RegExpGetMultiline, DONT_DELETE); 450 %DefineAccessor($RegExp, 'multiline', SETTER, RegExpSetMultiline, DONT_DELETE); 451 %DefineAccessor($RegExp, '$*', GETTER, RegExpGetMultiline, DONT_ENUM | DONT_DELETE); 452 %DefineAccessor($RegExp, '$*', SETTER, RegExpSetMultiline, DONT_ENUM | DONT_DELETE); 453 454 455 function NoOpSetter(ignored) {} 456 457 458 // Static properties set by a successful match. 459 %DefineAccessor($RegExp, 'lastMatch', GETTER, RegExpGetLastMatch, DONT_DELETE); 460 %DefineAccessor($RegExp, 'lastMatch', SETTER, NoOpSetter, DONT_DELETE); 461 %DefineAccessor($RegExp, '$&', GETTER, RegExpGetLastMatch, DONT_ENUM | DONT_DELETE); 462 %DefineAccessor($RegExp, '$&', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 463 %DefineAccessor($RegExp, 'lastParen', GETTER, RegExpGetLastParen, DONT_DELETE); 464 %DefineAccessor($RegExp, 'lastParen', SETTER, NoOpSetter, DONT_DELETE); 465 %DefineAccessor($RegExp, '$+', GETTER, RegExpGetLastParen, DONT_ENUM | DONT_DELETE); 466 %DefineAccessor($RegExp, '$+', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 467 %DefineAccessor($RegExp, 'leftContext', GETTER, RegExpGetLeftContext, DONT_DELETE); 468 %DefineAccessor($RegExp, 'leftContext', SETTER, NoOpSetter, DONT_DELETE); 469 %DefineAccessor($RegExp, '$`', GETTER, RegExpGetLeftContext, DONT_ENUM | DONT_DELETE); 470 %DefineAccessor($RegExp, '$`', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 471 %DefineAccessor($RegExp, 'rightContext', GETTER, RegExpGetRightContext, DONT_DELETE); 472 %DefineAccessor($RegExp, 'rightContext', SETTER, NoOpSetter, DONT_DELETE); 473 %DefineAccessor($RegExp, "$'", GETTER, RegExpGetRightContext, DONT_ENUM | DONT_DELETE); 474 %DefineAccessor($RegExp, "$'", SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 475 476 for (var i = 1; i < 10; ++i) { 477 %DefineAccessor($RegExp, '$' + i, GETTER, RegExpMakeCaptureGetter(i), DONT_DELETE); 478 %DefineAccessor($RegExp, '$' + i, SETTER, NoOpSetter, DONT_DELETE); 479 } 480} 481 482 483SetupRegExp(); 484