1// Copyright 2006-2009 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28// Expect $Object = global.Object; 29// Expect $Array = global.Array; 30 31const $RegExp = global.RegExp; 32 33// A recursive descent parser for Patterns according to the grammar of 34// ECMA-262 15.10.1, with deviations noted below. 35function DoConstructRegExp(object, pattern, flags, isConstructorCall) { 36 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. 37 if (IS_REGEXP(pattern)) { 38 if (!IS_UNDEFINED(flags)) { 39 throw MakeTypeError('regexp_flags', []); 40 } 41 flags = (pattern.global ? 'g' : '') 42 + (pattern.ignoreCase ? 'i' : '') 43 + (pattern.multiline ? 'm' : ''); 44 pattern = pattern.source; 45 } 46 47 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); 48 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); 49 50 var global = false; 51 var ignoreCase = false; 52 var multiline = false; 53 54 for (var i = 0; i < flags.length; i++) { 55 var c = StringCharAt.call(flags, i); 56 switch (c) { 57 case 'g': 58 // Allow duplicate flags to be consistent with JSC and others. 59 global = true; 60 break; 61 case 'i': 62 ignoreCase = true; 63 break; 64 case 'm': 65 multiline = true; 66 break; 67 default: 68 // Ignore flags that have no meaning to be consistent with 69 // JSC. 70 break; 71 } 72 } 73 74 if (isConstructorCall) { 75 // ECMA-262, section 15.10.7.1. 76 %SetProperty(object, 'source', pattern, 77 DONT_DELETE | READ_ONLY | DONT_ENUM); 78 79 // ECMA-262, section 15.10.7.2. 80 %SetProperty(object, 'global', global, DONT_DELETE | READ_ONLY | DONT_ENUM); 81 82 // ECMA-262, section 15.10.7.3. 83 %SetProperty(object, 'ignoreCase', ignoreCase, 84 DONT_DELETE | READ_ONLY | DONT_ENUM); 85 86 // ECMA-262, section 15.10.7.4. 87 %SetProperty(object, 'multiline', multiline, 88 DONT_DELETE | READ_ONLY | DONT_ENUM); 89 90 // ECMA-262, section 15.10.7.5. 91 %SetProperty(object, 'lastIndex', 0, DONT_DELETE | DONT_ENUM); 92 } else { // RegExp is being recompiled via RegExp.prototype.compile. 93 %IgnoreAttributesAndSetProperty(object, 'source', pattern); 94 %IgnoreAttributesAndSetProperty(object, 'global', global); 95 %IgnoreAttributesAndSetProperty(object, 'ignoreCase', ignoreCase); 96 %IgnoreAttributesAndSetProperty(object, 'multiline', multiline); 97 %IgnoreAttributesAndSetProperty(object, 'lastIndex', 0); 98 } 99 100 // Call internal function to compile the pattern. 101 %RegExpCompile(object, pattern, flags); 102} 103 104 105function RegExpConstructor(pattern, flags) { 106 if (%_IsConstructCall()) { 107 DoConstructRegExp(this, pattern, flags, true); 108 } else { 109 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. 110 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { 111 return pattern; 112 } 113 return new $RegExp(pattern, flags); 114 } 115} 116 117 118// Deprecated RegExp.prototype.compile method. We behave like the constructor 119// were called again. In SpiderMonkey, this method returns the regexp object. 120// In JSC, it returns undefined. For compatibility with JSC, we match their 121// behavior. 122function CompileRegExp(pattern, flags) { 123 // Both JSC and SpiderMonkey treat a missing pattern argument as the 124 // empty subject string, and an actual undefined value passed as the 125 // pattern as the string 'undefined'. Note that JSC is inconsistent 126 // here, treating undefined values differently in 127 // RegExp.prototype.compile and in the constructor, where they are 128 // the empty string. For compatibility with JSC, we match their 129 // behavior. 130 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { 131 DoConstructRegExp(this, 'undefined', flags, false); 132 } else { 133 DoConstructRegExp(this, pattern, flags, false); 134 } 135} 136 137 138function DoRegExpExec(regexp, string, index) { 139 return %_RegExpExec(regexp, string, index, lastMatchInfo); 140} 141 142 143function RegExpExec(string) { 144 if (!IS_REGEXP(this)) { 145 throw MakeTypeError('method_called_on_incompatible', 146 ['RegExp.prototype.exec', this]); 147 } 148 if (%_ArgumentsLength() == 0) { 149 var regExpInput = LAST_INPUT(lastMatchInfo); 150 if (IS_UNDEFINED(regExpInput)) { 151 throw MakeError('no_input_to_regexp', [this]); 152 } 153 string = regExpInput; 154 } 155 var s = ToString(string); 156 var length = s.length; 157 var lastIndex = this.lastIndex; 158 var i = this.global ? TO_INTEGER(lastIndex) : 0; 159 160 if (i < 0 || i > s.length) { 161 this.lastIndex = 0; 162 return null; 163 } 164 165 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); 166 // matchIndices is either null or the lastMatchInfo array. 167 var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo); 168 169 if (matchIndices == null) { 170 if (this.global) this.lastIndex = 0; 171 return matchIndices; // no match 172 } 173 174 var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; 175 var result = new $Array(numResults); 176 for (var i = 0; i < numResults; i++) { 177 var matchStart = lastMatchInfo[CAPTURE(i << 1)]; 178 var matchEnd = lastMatchInfo[CAPTURE((i << 1) + 1)]; 179 if (matchStart != -1 && matchEnd != -1) { 180 result[i] = SubString(s, matchStart, matchEnd); 181 } else { 182 // Make sure the element is present. Avoid reading the undefined 183 // property from the global object since this may change. 184 result[i] = void 0; 185 } 186 } 187 188 if (this.global) 189 this.lastIndex = lastMatchInfo[CAPTURE1]; 190 result.index = lastMatchInfo[CAPTURE0]; 191 result.input = s; 192 return result; 193} 194 195 196// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be 197// that test is defined in terms of String.prototype.exec. However, it probably 198// means the original value of String.prototype.exec, which is what everybody 199// else implements. 200function RegExpTest(string) { 201 if (!IS_REGEXP(this)) { 202 throw MakeTypeError('method_called_on_incompatible', 203 ['RegExp.prototype.test', this]); 204 } 205 if (%_ArgumentsLength() == 0) { 206 var regExpInput = LAST_INPUT(lastMatchInfo); 207 if (IS_UNDEFINED(regExpInput)) { 208 throw MakeError('no_input_to_regexp', [this]); 209 } 210 string = regExpInput; 211 } 212 var s = ToString(string); 213 var length = s.length; 214 var lastIndex = this.lastIndex; 215 var i = this.global ? TO_INTEGER(lastIndex) : 0; 216 217 if (i < 0 || i > s.length) { 218 this.lastIndex = 0; 219 return false; 220 } 221 222 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); 223 // matchIndices is either null or the lastMatchInfo array. 224 var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo); 225 226 if (matchIndices == null) { 227 if (this.global) this.lastIndex = 0; 228 return false; 229 } 230 231 if (this.global) this.lastIndex = lastMatchInfo[CAPTURE1]; 232 return true; 233} 234 235 236function RegExpToString() { 237 // If this.source is an empty string, output /(?:)/. 238 // http://bugzilla.mozilla.org/show_bug.cgi?id=225550 239 // ecma_2/RegExp/properties-001.js. 240 var src = this.source ? this.source : '(?:)'; 241 var result = '/' + src + '/'; 242 if (this.global) 243 result += 'g'; 244 if (this.ignoreCase) 245 result += 'i'; 246 if (this.multiline) 247 result += 'm'; 248 return result; 249} 250 251 252// Getters for the static properties lastMatch, lastParen, leftContext, and 253// rightContext of the RegExp constructor. The properties are computed based 254// on the captures array of the last successful match and the subject string 255// of the last successful match. 256function RegExpGetLastMatch() { 257 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 258 return SubString(regExpSubject, 259 lastMatchInfo[CAPTURE0], 260 lastMatchInfo[CAPTURE1]); 261} 262 263 264function RegExpGetLastParen() { 265 var length = NUMBER_OF_CAPTURES(lastMatchInfo); 266 if (length <= 2) return ''; // There were no captures. 267 // We match the SpiderMonkey behavior: return the substring defined by the 268 // last pair (after the first pair) of elements of the capture array even if 269 // it is empty. 270 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 271 var start = lastMatchInfo[CAPTURE(length - 2)]; 272 var end = lastMatchInfo[CAPTURE(length - 1)]; 273 if (start != -1 && end != -1) { 274 return SubString(regExpSubject, start, end); 275 } 276 return ""; 277} 278 279 280function RegExpGetLeftContext() { 281 return SubString(LAST_SUBJECT(lastMatchInfo), 282 0, 283 lastMatchInfo[CAPTURE0]); 284} 285 286 287function RegExpGetRightContext() { 288 var subject = LAST_SUBJECT(lastMatchInfo); 289 return SubString(subject, 290 lastMatchInfo[CAPTURE1], 291 subject.length); 292} 293 294 295// The properties $1..$9 are the first nine capturing substrings of the last 296// successful match, or ''. The function RegExpMakeCaptureGetter will be 297// called with indices from 1 to 9. 298function RegExpMakeCaptureGetter(n) { 299 return function() { 300 var index = n * 2; 301 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; 302 var matchStart = lastMatchInfo[CAPTURE(index)]; 303 var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; 304 if (matchStart == -1 || matchEnd == -1) return ''; 305 return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); 306 }; 307} 308 309 310// Property of the builtins object for recording the result of the last 311// regexp match. The property lastMatchInfo includes the matchIndices 312// array of the last successful regexp match (an array of start/end index 313// pairs for the match and all the captured substrings), the invariant is 314// that there are at least two capture indeces. The array also contains 315// the subject string for the last successful match. 316var lastMatchInfo = [ 317 2, // REGEXP_NUMBER_OF_CAPTURES 318 "", // Last subject. 319 void 0, // Last input - settable with RegExpSetInput. 320 0, // REGEXP_FIRST_CAPTURE + 0 321 0, // REGEXP_FIRST_CAPTURE + 1 322]; 323 324// ------------------------------------------------------------------- 325 326function SetupRegExp() { 327 %FunctionSetInstanceClassName($RegExp, 'RegExp'); 328 %FunctionSetPrototype($RegExp, new $Object()); 329 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); 330 %SetCode($RegExp, RegExpConstructor); 331 332 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( 333 "exec", RegExpExec, 334 "test", RegExpTest, 335 "toString", RegExpToString, 336 "compile", CompileRegExp 337 )); 338 339 // The length of compile is 1 in SpiderMonkey. 340 %FunctionSetLength($RegExp.prototype.compile, 1); 341 342 // The properties input, $input, and $_ are aliases for each other. When this 343 // value is set the value it is set to is coerced to a string. 344 // Getter and setter for the input. 345 function RegExpGetInput() { 346 var regExpInput = LAST_INPUT(lastMatchInfo); 347 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; 348 } 349 function RegExpSetInput(string) { 350 LAST_INPUT(lastMatchInfo) = ToString(string); 351 }; 352 353 %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE); 354 %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE); 355 %DefineAccessor($RegExp, '$_', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE); 356 %DefineAccessor($RegExp, '$_', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE); 357 %DefineAccessor($RegExp, '$input', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE); 358 %DefineAccessor($RegExp, '$input', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE); 359 360 // The properties multiline and $* are aliases for each other. When this 361 // value is set in SpiderMonkey, the value it is set to is coerced to a 362 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey 363 // the value of the expression 'RegExp.multiline = null' (for instance) is the 364 // boolean false (ie, the value after coercion), while in V8 it is the value 365 // null (ie, the value before coercion). 366 367 // Getter and setter for multiline. 368 var multiline = false; 369 function RegExpGetMultiline() { return multiline; }; 370 function RegExpSetMultiline(flag) { multiline = flag ? true : false; }; 371 372 %DefineAccessor($RegExp, 'multiline', GETTER, RegExpGetMultiline, DONT_DELETE); 373 %DefineAccessor($RegExp, 'multiline', SETTER, RegExpSetMultiline, DONT_DELETE); 374 %DefineAccessor($RegExp, '$*', GETTER, RegExpGetMultiline, DONT_ENUM | DONT_DELETE); 375 %DefineAccessor($RegExp, '$*', SETTER, RegExpSetMultiline, DONT_ENUM | DONT_DELETE); 376 377 378 function NoOpSetter(ignored) {} 379 380 381 // Static properties set by a successful match. 382 %DefineAccessor($RegExp, 'lastMatch', GETTER, RegExpGetLastMatch, DONT_DELETE); 383 %DefineAccessor($RegExp, 'lastMatch', SETTER, NoOpSetter, DONT_DELETE); 384 %DefineAccessor($RegExp, '$&', GETTER, RegExpGetLastMatch, DONT_ENUM | DONT_DELETE); 385 %DefineAccessor($RegExp, '$&', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 386 %DefineAccessor($RegExp, 'lastParen', GETTER, RegExpGetLastParen, DONT_DELETE); 387 %DefineAccessor($RegExp, 'lastParen', SETTER, NoOpSetter, DONT_DELETE); 388 %DefineAccessor($RegExp, '$+', GETTER, RegExpGetLastParen, DONT_ENUM | DONT_DELETE); 389 %DefineAccessor($RegExp, '$+', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 390 %DefineAccessor($RegExp, 'leftContext', GETTER, RegExpGetLeftContext, DONT_DELETE); 391 %DefineAccessor($RegExp, 'leftContext', SETTER, NoOpSetter, DONT_DELETE); 392 %DefineAccessor($RegExp, '$`', GETTER, RegExpGetLeftContext, DONT_ENUM | DONT_DELETE); 393 %DefineAccessor($RegExp, '$`', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 394 %DefineAccessor($RegExp, 'rightContext', GETTER, RegExpGetRightContext, DONT_DELETE); 395 %DefineAccessor($RegExp, 'rightContext', SETTER, NoOpSetter, DONT_DELETE); 396 %DefineAccessor($RegExp, "$'", GETTER, RegExpGetRightContext, DONT_ENUM | DONT_DELETE); 397 %DefineAccessor($RegExp, "$'", SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 398 399 for (var i = 1; i < 10; ++i) { 400 %DefineAccessor($RegExp, '$' + i, GETTER, RegExpMakeCaptureGetter(i), DONT_DELETE); 401 %DefineAccessor($RegExp, '$' + i, SETTER, NoOpSetter, DONT_DELETE); 402 } 403} 404 405 406SetupRegExp(); 407