1// Copyright 2006-2009 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28// Expect $Object = global.Object; 29// Expect $Array = global.Array; 30 31const $RegExp = global.RegExp; 32 33// A recursive descent parser for Patterns according to the grammar of 34// ECMA-262 15.10.1, with deviations noted below. 35function DoConstructRegExp(object, pattern, flags, isConstructorCall) { 36 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. 37 if (IS_REGEXP(pattern)) { 38 if (!IS_UNDEFINED(flags)) { 39 throw MakeTypeError('regexp_flags', []); 40 } 41 flags = (pattern.global ? 'g' : '') 42 + (pattern.ignoreCase ? 'i' : '') 43 + (pattern.multiline ? 'm' : ''); 44 pattern = pattern.source; 45 } 46 47 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); 48 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); 49 50 var global = false; 51 var ignoreCase = false; 52 var multiline = false; 53 54 for (var i = 0; i < flags.length; i++) { 55 var c = StringCharAt.call(flags, i); 56 switch (c) { 57 case 'g': 58 // Allow duplicate flags to be consistent with JSC and others. 59 global = true; 60 break; 61 case 'i': 62 ignoreCase = true; 63 break; 64 case 'm': 65 multiline = true; 66 break; 67 default: 68 // Ignore flags that have no meaning to be consistent with 69 // JSC. 70 break; 71 } 72 } 73 74 if (isConstructorCall) { 75 // ECMA-262, section 15.10.7.1. 76 %SetProperty(object, 'source', pattern, 77 DONT_DELETE | READ_ONLY | DONT_ENUM); 78 79 // ECMA-262, section 15.10.7.2. 80 %SetProperty(object, 'global', global, DONT_DELETE | READ_ONLY | DONT_ENUM); 81 82 // ECMA-262, section 15.10.7.3. 83 %SetProperty(object, 'ignoreCase', ignoreCase, 84 DONT_DELETE | READ_ONLY | DONT_ENUM); 85 86 // ECMA-262, section 15.10.7.4. 87 %SetProperty(object, 'multiline', multiline, 88 DONT_DELETE | READ_ONLY | DONT_ENUM); 89 90 // ECMA-262, section 15.10.7.5. 91 %SetProperty(object, 'lastIndex', 0, DONT_DELETE | DONT_ENUM); 92 } else { // RegExp is being recompiled via RegExp.prototype.compile. 93 %IgnoreAttributesAndSetProperty(object, 'source', pattern); 94 %IgnoreAttributesAndSetProperty(object, 'global', global); 95 %IgnoreAttributesAndSetProperty(object, 'ignoreCase', ignoreCase); 96 %IgnoreAttributesAndSetProperty(object, 'multiline', multiline); 97 %IgnoreAttributesAndSetProperty(object, 'lastIndex', 0); 98 } 99 100 // Call internal function to compile the pattern. 101 %RegExpCompile(object, pattern, flags); 102} 103 104 105function RegExpConstructor(pattern, flags) { 106 if (%_IsConstructCall()) { 107 DoConstructRegExp(this, pattern, flags, true); 108 } else { 109 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. 110 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { 111 return pattern; 112 } 113 return new $RegExp(pattern, flags); 114 } 115} 116 117 118// Deprecated RegExp.prototype.compile method. We behave like the constructor 119// were called again. In SpiderMonkey, this method returns the regexp object. 120// In JSC, it returns undefined. For compatibility with JSC, we match their 121// behavior. 122function CompileRegExp(pattern, flags) { 123 // Both JSC and SpiderMonkey treat a missing pattern argument as the 124 // empty subject string, and an actual undefined value passed as the 125 // pattern as the string 'undefined'. Note that JSC is inconsistent 126 // here, treating undefined values differently in 127 // RegExp.prototype.compile and in the constructor, where they are 128 // the empty string. For compatibility with JSC, we match their 129 // behavior. 130 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { 131 DoConstructRegExp(this, 'undefined', flags, false); 132 } else { 133 DoConstructRegExp(this, pattern, flags, false); 134 } 135} 136 137 138function DoRegExpExec(regexp, string, index) { 139 return %RegExpExec(regexp, string, index, lastMatchInfo); 140} 141 142 143function DoRegExpExecGlobal(regexp, string) { 144 // Returns an array of arrays of substring indices. 145 return %RegExpExecGlobal(regexp, string, lastMatchInfo); 146} 147 148 149function RegExpExec(string) { 150 if (!IS_REGEXP(this)) { 151 throw MakeTypeError('method_called_on_incompatible', 152 ['RegExp.prototype.exec', this]); 153 } 154 if (%_ArgumentsLength() == 0) { 155 var regExpInput = LAST_INPUT(lastMatchInfo); 156 if (IS_UNDEFINED(regExpInput)) { 157 throw MakeError('no_input_to_regexp', [this]); 158 } 159 string = regExpInput; 160 } 161 var s = ToString(string); 162 var length = s.length; 163 var lastIndex = this.lastIndex; 164 var i = this.global ? TO_INTEGER(lastIndex) : 0; 165 166 if (i < 0 || i > s.length) { 167 this.lastIndex = 0; 168 return null; 169 } 170 171 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); 172 // matchIndices is either null or the lastMatchInfo array. 173 var matchIndices = %RegExpExec(this, s, i, lastMatchInfo); 174 175 if (matchIndices == null) { 176 if (this.global) this.lastIndex = 0; 177 return matchIndices; // no match 178 } 179 180 var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; 181 var result = new $Array(numResults); 182 for (var i = 0; i < numResults; i++) { 183 var matchStart = lastMatchInfo[CAPTURE(i << 1)]; 184 var matchEnd = lastMatchInfo[CAPTURE((i << 1) + 1)]; 185 if (matchStart != -1 && matchEnd != -1) { 186 result[i] = SubString(s, matchStart, matchEnd); 187 } else { 188 // Make sure the element is present. Avoid reading the undefined 189 // property from the global object since this may change. 190 result[i] = void 0; 191 } 192 } 193 194 if (this.global) 195 this.lastIndex = lastMatchInfo[CAPTURE1]; 196 result.index = lastMatchInfo[CAPTURE0]; 197 result.input = s; 198 return result; 199} 200 201 202// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be 203// that test is defined in terms of String.prototype.exec. However, it probably 204// means the original value of String.prototype.exec, which is what everybody 205// else implements. 206function RegExpTest(string) { 207 if (!IS_REGEXP(this)) { 208 throw MakeTypeError('method_called_on_incompatible', 209 ['RegExp.prototype.test', this]); 210 } 211 if (%_ArgumentsLength() == 0) { 212 var regExpInput = LAST_INPUT(lastMatchInfo); 213 if (IS_UNDEFINED(regExpInput)) { 214 throw MakeError('no_input_to_regexp', [this]); 215 } 216 string = regExpInput; 217 } 218 var s = ToString(string); 219 var length = s.length; 220 var lastIndex = this.lastIndex; 221 var i = this.global ? TO_INTEGER(lastIndex) : 0; 222 223 if (i < 0 || i > s.length) { 224 this.lastIndex = 0; 225 return false; 226 } 227 228 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); 229 // matchIndices is either null or the lastMatchInfo array. 230 var matchIndices = %RegExpExec(this, s, i, lastMatchInfo); 231 232 if (matchIndices == null) { 233 if (this.global) this.lastIndex = 0; 234 return false; 235 } 236 237 if (this.global) this.lastIndex = lastMatchInfo[CAPTURE1]; 238 return true; 239} 240 241 242function RegExpToString() { 243 // If this.source is an empty string, output /(?:)/. 244 // http://bugzilla.mozilla.org/show_bug.cgi?id=225550 245 // ecma_2/RegExp/properties-001.js. 246 var src = this.source ? this.source : '(?:)'; 247 var result = '/' + src + '/'; 248 if (this.global) 249 result += 'g'; 250 if (this.ignoreCase) 251 result += 'i'; 252 if (this.multiline) 253 result += 'm'; 254 return result; 255} 256 257 258// Getters for the static properties lastMatch, lastParen, leftContext, and 259// rightContext of the RegExp constructor. The properties are computed based 260// on the captures array of the last successful match and the subject string 261// of the last successful match. 262function RegExpGetLastMatch() { 263 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 264 return SubString(regExpSubject, 265 lastMatchInfo[CAPTURE0], 266 lastMatchInfo[CAPTURE1]); 267} 268 269 270function RegExpGetLastParen() { 271 var length = NUMBER_OF_CAPTURES(lastMatchInfo); 272 if (length <= 2) return ''; // There were no captures. 273 // We match the SpiderMonkey behavior: return the substring defined by the 274 // last pair (after the first pair) of elements of the capture array even if 275 // it is empty. 276 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 277 var start = lastMatchInfo[CAPTURE(length - 2)]; 278 var end = lastMatchInfo[CAPTURE(length - 1)]; 279 if (start != -1 && end != -1) { 280 return SubString(regExpSubject, start, end); 281 } 282 return ""; 283} 284 285 286function RegExpGetLeftContext() { 287 return SubString(LAST_SUBJECT(lastMatchInfo), 288 0, 289 lastMatchInfo[CAPTURE0]); 290} 291 292 293function RegExpGetRightContext() { 294 var subject = LAST_SUBJECT(lastMatchInfo); 295 return SubString(subject, 296 lastMatchInfo[CAPTURE1], 297 subject.length); 298} 299 300 301// The properties $1..$9 are the first nine capturing substrings of the last 302// successful match, or ''. The function RegExpMakeCaptureGetter will be 303// called with indices from 1 to 9. 304function RegExpMakeCaptureGetter(n) { 305 return function() { 306 var index = n * 2; 307 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; 308 var matchStart = lastMatchInfo[CAPTURE(index)]; 309 var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; 310 if (matchStart == -1 || matchEnd == -1) return ''; 311 return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); 312 }; 313} 314 315 316// Property of the builtins object for recording the result of the last 317// regexp match. The property lastMatchInfo includes the matchIndices 318// array of the last successful regexp match (an array of start/end index 319// pairs for the match and all the captured substrings), the invariant is 320// that there are at least two capture indeces. The array also contains 321// the subject string for the last successful match. 322var lastMatchInfo = [ 323 2, // REGEXP_NUMBER_OF_CAPTURES 324 "", // Last subject. 325 void 0, // Last input - settable with RegExpSetInput. 326 0, // REGEXP_FIRST_CAPTURE + 0 327 0, // REGEXP_FIRST_CAPTURE + 1 328]; 329 330// ------------------------------------------------------------------- 331 332function SetupRegExp() { 333 %FunctionSetInstanceClassName($RegExp, 'RegExp'); 334 %FunctionSetPrototype($RegExp, new $Object()); 335 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); 336 %SetCode($RegExp, RegExpConstructor); 337 338 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( 339 "exec", RegExpExec, 340 "test", RegExpTest, 341 "toString", RegExpToString, 342 "compile", CompileRegExp 343 )); 344 345 // The length of compile is 1 in SpiderMonkey. 346 %FunctionSetLength($RegExp.prototype.compile, 1); 347 348 // The properties input, $input, and $_ are aliases for each other. When this 349 // value is set the value it is set to is coerced to a string. 350 // Getter and setter for the input. 351 function RegExpGetInput() { 352 var regExpInput = LAST_INPUT(lastMatchInfo); 353 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; 354 } 355 function RegExpSetInput(string) { 356 LAST_INPUT(lastMatchInfo) = ToString(string); 357 }; 358 359 %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE); 360 %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE); 361 %DefineAccessor($RegExp, '$_', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE); 362 %DefineAccessor($RegExp, '$_', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE); 363 %DefineAccessor($RegExp, '$input', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE); 364 %DefineAccessor($RegExp, '$input', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE); 365 366 // The properties multiline and $* are aliases for each other. When this 367 // value is set in SpiderMonkey, the value it is set to is coerced to a 368 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey 369 // the value of the expression 'RegExp.multiline = null' (for instance) is the 370 // boolean false (ie, the value after coercion), while in V8 it is the value 371 // null (ie, the value before coercion). 372 373 // Getter and setter for multiline. 374 var multiline = false; 375 function RegExpGetMultiline() { return multiline; }; 376 function RegExpSetMultiline(flag) { multiline = flag ? true : false; }; 377 378 %DefineAccessor($RegExp, 'multiline', GETTER, RegExpGetMultiline, DONT_DELETE); 379 %DefineAccessor($RegExp, 'multiline', SETTER, RegExpSetMultiline, DONT_DELETE); 380 %DefineAccessor($RegExp, '$*', GETTER, RegExpGetMultiline, DONT_ENUM | DONT_DELETE); 381 %DefineAccessor($RegExp, '$*', SETTER, RegExpSetMultiline, DONT_ENUM | DONT_DELETE); 382 383 384 function NoOpSetter(ignored) {} 385 386 387 // Static properties set by a successful match. 388 %DefineAccessor($RegExp, 'lastMatch', GETTER, RegExpGetLastMatch, DONT_DELETE); 389 %DefineAccessor($RegExp, 'lastMatch', SETTER, NoOpSetter, DONT_DELETE); 390 %DefineAccessor($RegExp, '$&', GETTER, RegExpGetLastMatch, DONT_ENUM | DONT_DELETE); 391 %DefineAccessor($RegExp, '$&', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 392 %DefineAccessor($RegExp, 'lastParen', GETTER, RegExpGetLastParen, DONT_DELETE); 393 %DefineAccessor($RegExp, 'lastParen', SETTER, NoOpSetter, DONT_DELETE); 394 %DefineAccessor($RegExp, '$+', GETTER, RegExpGetLastParen, DONT_ENUM | DONT_DELETE); 395 %DefineAccessor($RegExp, '$+', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 396 %DefineAccessor($RegExp, 'leftContext', GETTER, RegExpGetLeftContext, DONT_DELETE); 397 %DefineAccessor($RegExp, 'leftContext', SETTER, NoOpSetter, DONT_DELETE); 398 %DefineAccessor($RegExp, '$`', GETTER, RegExpGetLeftContext, DONT_ENUM | DONT_DELETE); 399 %DefineAccessor($RegExp, '$`', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 400 %DefineAccessor($RegExp, 'rightContext', GETTER, RegExpGetRightContext, DONT_DELETE); 401 %DefineAccessor($RegExp, 'rightContext', SETTER, NoOpSetter, DONT_DELETE); 402 %DefineAccessor($RegExp, "$'", GETTER, RegExpGetRightContext, DONT_ENUM | DONT_DELETE); 403 %DefineAccessor($RegExp, "$'", SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 404 405 for (var i = 1; i < 10; ++i) { 406 %DefineAccessor($RegExp, '$' + i, GETTER, RegExpMakeCaptureGetter(i), DONT_DELETE); 407 %DefineAccessor($RegExp, '$' + i, SETTER, NoOpSetter, DONT_DELETE); 408 } 409} 410 411 412SetupRegExp(); 413