1// Copyright 2012 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// This file relies on the fact that the following declaration has been made 6// in runtime.js: 7// var $Object = global.Object; 8// var $Array = global.Array; 9 10var $RegExp = global.RegExp; 11 12// ------------------------------------------------------------------- 13 14// A recursive descent parser for Patterns according to the grammar of 15// ECMA-262 15.10.1, with deviations noted below. 16function DoConstructRegExp(object, pattern, flags) { 17 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. 18 if (IS_REGEXP(pattern)) { 19 if (!IS_UNDEFINED(flags)) { 20 throw MakeTypeError('regexp_flags', []); 21 } 22 flags = (pattern.global ? 'g' : '') 23 + (pattern.ignoreCase ? 'i' : '') 24 + (pattern.multiline ? 'm' : ''); 25 pattern = pattern.source; 26 } 27 28 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); 29 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); 30 31 var global = false; 32 var ignoreCase = false; 33 var multiline = false; 34 for (var i = 0; i < flags.length; i++) { 35 var c = %_CallFunction(flags, i, StringCharAt); 36 switch (c) { 37 case 'g': 38 if (global) { 39 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 40 } 41 global = true; 42 break; 43 case 'i': 44 if (ignoreCase) { 45 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 46 } 47 ignoreCase = true; 48 break; 49 case 'm': 50 if (multiline) { 51 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 52 } 53 multiline = true; 54 break; 55 default: 56 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 57 } 58 } 59 60 %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline); 61 62 // Call internal function to compile the pattern. 63 %RegExpCompile(object, pattern, flags); 64} 65 66 67function RegExpConstructor(pattern, flags) { 68 if (%_IsConstructCall()) { 69 DoConstructRegExp(this, pattern, flags); 70 } else { 71 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. 72 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { 73 return pattern; 74 } 75 return new $RegExp(pattern, flags); 76 } 77} 78 79// Deprecated RegExp.prototype.compile method. We behave like the constructor 80// were called again. In SpiderMonkey, this method returns the regexp object. 81// In JSC, it returns undefined. For compatibility with JSC, we match their 82// behavior. 83function RegExpCompileJS(pattern, flags) { 84 // Both JSC and SpiderMonkey treat a missing pattern argument as the 85 // empty subject string, and an actual undefined value passed as the 86 // pattern as the string 'undefined'. Note that JSC is inconsistent 87 // here, treating undefined values differently in 88 // RegExp.prototype.compile and in the constructor, where they are 89 // the empty string. For compatibility with JSC, we match their 90 // behavior. 91 if (this == $RegExp.prototype) { 92 // We don't allow recompiling RegExp.prototype. 93 throw MakeTypeError('incompatible_method_receiver', 94 ['RegExp.prototype.compile', this]); 95 } 96 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { 97 DoConstructRegExp(this, 'undefined', flags); 98 } else { 99 DoConstructRegExp(this, pattern, flags); 100 } 101} 102 103 104function DoRegExpExec(regexp, string, index) { 105 var result = %_RegExpExec(regexp, string, index, lastMatchInfo); 106 if (result !== null) lastMatchInfoOverride = null; 107 return result; 108} 109 110 111// This is kind of performance sensitive, so we want to avoid unnecessary 112// type checks on inputs. But we also don't want to inline it several times 113// manually, so we use a macro :-) 114macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) 115 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; 116 var start = MATCHINFO[CAPTURE0]; 117 var end = MATCHINFO[CAPTURE1]; 118 // Calculate the substring of the first match before creating the result array 119 // to avoid an unnecessary write barrier storing the first result. 120 var first = %_SubString(STRING, start, end); 121 var result = %_RegExpConstructResult(numResults, start, STRING); 122 result[0] = first; 123 if (numResults == 1) return result; 124 var j = REGEXP_FIRST_CAPTURE + 2; 125 for (var i = 1; i < numResults; i++) { 126 start = MATCHINFO[j++]; 127 if (start != -1) { 128 end = MATCHINFO[j]; 129 result[i] = %_SubString(STRING, start, end); 130 } 131 j++; 132 } 133 return result; 134endmacro 135 136 137function RegExpExecNoTests(regexp, string, start) { 138 // Must be called with RegExp, string and positive integer as arguments. 139 var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo); 140 if (matchInfo !== null) { 141 lastMatchInfoOverride = null; 142 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string); 143 } 144 regexp.lastIndex = 0; 145 return null; 146} 147 148 149function RegExpExec(string) { 150 if (!IS_REGEXP(this)) { 151 throw MakeTypeError('incompatible_method_receiver', 152 ['RegExp.prototype.exec', this]); 153 } 154 155 string = TO_STRING_INLINE(string); 156 var lastIndex = this.lastIndex; 157 158 // Conversion is required by the ES5 specification (RegExp.prototype.exec 159 // algorithm, step 5) even if the value is discarded for non-global RegExps. 160 var i = TO_INTEGER(lastIndex); 161 162 var global = this.global; 163 if (global) { 164 if (i < 0 || i > string.length) { 165 this.lastIndex = 0; 166 return null; 167 } 168 } else { 169 i = 0; 170 } 171 172 // matchIndices is either null or the lastMatchInfo array. 173 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 174 175 if (IS_NULL(matchIndices)) { 176 this.lastIndex = 0; 177 return null; 178 } 179 180 // Successful match. 181 lastMatchInfoOverride = null; 182 if (global) { 183 this.lastIndex = lastMatchInfo[CAPTURE1]; 184 } 185 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string); 186} 187 188 189// One-element cache for the simplified test regexp. 190var regexp_key; 191var regexp_val; 192 193// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be 194// that test is defined in terms of String.prototype.exec. However, it probably 195// means the original value of String.prototype.exec, which is what everybody 196// else implements. 197function RegExpTest(string) { 198 if (!IS_REGEXP(this)) { 199 throw MakeTypeError('incompatible_method_receiver', 200 ['RegExp.prototype.test', this]); 201 } 202 string = TO_STRING_INLINE(string); 203 204 var lastIndex = this.lastIndex; 205 206 // Conversion is required by the ES5 specification (RegExp.prototype.exec 207 // algorithm, step 5) even if the value is discarded for non-global RegExps. 208 var i = TO_INTEGER(lastIndex); 209 210 if (this.global) { 211 if (i < 0 || i > string.length) { 212 this.lastIndex = 0; 213 return false; 214 } 215 // matchIndices is either null or the lastMatchInfo array. 216 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 217 if (IS_NULL(matchIndices)) { 218 this.lastIndex = 0; 219 return false; 220 } 221 lastMatchInfoOverride = null; 222 this.lastIndex = lastMatchInfo[CAPTURE1]; 223 return true; 224 } else { 225 // Non-global regexp. 226 // Remove irrelevant preceeding '.*' in a non-global test regexp. 227 // The expression checks whether this.source starts with '.*' and 228 // that the third char is not a '?'. 229 var regexp = this; 230 if (%_StringCharCodeAt(regexp.source, 0) == 46 && // '.' 231 %_StringCharCodeAt(regexp.source, 1) == 42 && // '*' 232 %_StringCharCodeAt(regexp.source, 2) != 63) { // '?' 233 regexp = TrimRegExp(regexp); 234 } 235 // matchIndices is either null or the lastMatchInfo array. 236 var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo); 237 if (IS_NULL(matchIndices)) { 238 this.lastIndex = 0; 239 return false; 240 } 241 lastMatchInfoOverride = null; 242 return true; 243 } 244} 245 246function TrimRegExp(regexp) { 247 if (!%_ObjectEquals(regexp_key, regexp)) { 248 regexp_key = regexp; 249 regexp_val = 250 new $RegExp(%_SubString(regexp.source, 2, regexp.source.length), 251 (regexp.ignoreCase ? regexp.multiline ? "im" : "i" 252 : regexp.multiline ? "m" : "")); 253 } 254 return regexp_val; 255} 256 257 258function RegExpToString() { 259 if (!IS_REGEXP(this)) { 260 throw MakeTypeError('incompatible_method_receiver', 261 ['RegExp.prototype.toString', this]); 262 } 263 var result = '/' + this.source + '/'; 264 if (this.global) result += 'g'; 265 if (this.ignoreCase) result += 'i'; 266 if (this.multiline) result += 'm'; 267 return result; 268} 269 270 271// Getters for the static properties lastMatch, lastParen, leftContext, and 272// rightContext of the RegExp constructor. The properties are computed based 273// on the captures array of the last successful match and the subject string 274// of the last successful match. 275function RegExpGetLastMatch() { 276 if (lastMatchInfoOverride !== null) { 277 return OVERRIDE_MATCH(lastMatchInfoOverride); 278 } 279 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 280 return %_SubString(regExpSubject, 281 lastMatchInfo[CAPTURE0], 282 lastMatchInfo[CAPTURE1]); 283} 284 285 286function RegExpGetLastParen() { 287 if (lastMatchInfoOverride) { 288 var override = lastMatchInfoOverride; 289 if (override.length <= 3) return ''; 290 return override[override.length - 3]; 291 } 292 var length = NUMBER_OF_CAPTURES(lastMatchInfo); 293 if (length <= 2) return ''; // There were no captures. 294 // We match the SpiderMonkey behavior: return the substring defined by the 295 // last pair (after the first pair) of elements of the capture array even if 296 // it is empty. 297 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 298 var start = lastMatchInfo[CAPTURE(length - 2)]; 299 var end = lastMatchInfo[CAPTURE(length - 1)]; 300 if (start != -1 && end != -1) { 301 return %_SubString(regExpSubject, start, end); 302 } 303 return ""; 304} 305 306 307function RegExpGetLeftContext() { 308 var start_index; 309 var subject; 310 if (!lastMatchInfoOverride) { 311 start_index = lastMatchInfo[CAPTURE0]; 312 subject = LAST_SUBJECT(lastMatchInfo); 313 } else { 314 var override = lastMatchInfoOverride; 315 start_index = OVERRIDE_POS(override); 316 subject = OVERRIDE_SUBJECT(override); 317 } 318 return %_SubString(subject, 0, start_index); 319} 320 321 322function RegExpGetRightContext() { 323 var start_index; 324 var subject; 325 if (!lastMatchInfoOverride) { 326 start_index = lastMatchInfo[CAPTURE1]; 327 subject = LAST_SUBJECT(lastMatchInfo); 328 } else { 329 var override = lastMatchInfoOverride; 330 subject = OVERRIDE_SUBJECT(override); 331 var match = OVERRIDE_MATCH(override); 332 start_index = OVERRIDE_POS(override) + match.length; 333 } 334 return %_SubString(subject, start_index, subject.length); 335} 336 337 338// The properties $1..$9 are the first nine capturing substrings of the last 339// successful match, or ''. The function RegExpMakeCaptureGetter will be 340// called with indices from 1 to 9. 341function RegExpMakeCaptureGetter(n) { 342 return function() { 343 if (lastMatchInfoOverride) { 344 if (n < lastMatchInfoOverride.length - 2) { 345 return OVERRIDE_CAPTURE(lastMatchInfoOverride, n); 346 } 347 return ''; 348 } 349 var index = n * 2; 350 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; 351 var matchStart = lastMatchInfo[CAPTURE(index)]; 352 var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; 353 if (matchStart == -1 || matchEnd == -1) return ''; 354 return %_SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); 355 }; 356} 357 358 359// Property of the builtins object for recording the result of the last 360// regexp match. The property lastMatchInfo includes the matchIndices 361// array of the last successful regexp match (an array of start/end index 362// pairs for the match and all the captured substrings), the invariant is 363// that there are at least two capture indeces. The array also contains 364// the subject string for the last successful match. 365var lastMatchInfo = new InternalPackedArray( 366 2, // REGEXP_NUMBER_OF_CAPTURES 367 "", // Last subject. 368 UNDEFINED, // Last input - settable with RegExpSetInput. 369 0, // REGEXP_FIRST_CAPTURE + 0 370 0 // REGEXP_FIRST_CAPTURE + 1 371); 372 373// Override last match info with an array of actual substrings. 374// Used internally by replace regexp with function. 375// The array has the format of an "apply" argument for a replacement 376// function. 377var lastMatchInfoOverride = null; 378 379// ------------------------------------------------------------------- 380 381function SetUpRegExp() { 382 %CheckIsBootstrapping(); 383 %FunctionSetInstanceClassName($RegExp, 'RegExp'); 384 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); 385 %SetCode($RegExp, RegExpConstructor); 386 387 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( 388 "exec", RegExpExec, 389 "test", RegExpTest, 390 "toString", RegExpToString, 391 "compile", RegExpCompileJS 392 )); 393 394 // The length of compile is 1 in SpiderMonkey. 395 %FunctionSetLength($RegExp.prototype.compile, 1); 396 397 // The properties input, $input, and $_ are aliases for each other. When this 398 // value is set the value it is set to is coerced to a string. 399 // Getter and setter for the input. 400 var RegExpGetInput = function() { 401 var regExpInput = LAST_INPUT(lastMatchInfo); 402 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; 403 }; 404 var RegExpSetInput = function(string) { 405 LAST_INPUT(lastMatchInfo) = ToString(string); 406 }; 407 408 %OptimizeObjectForAddingMultipleProperties($RegExp, 22); 409 %DefineOrRedefineAccessorProperty($RegExp, 'input', RegExpGetInput, 410 RegExpSetInput, DONT_DELETE); 411 %DefineOrRedefineAccessorProperty($RegExp, '$_', RegExpGetInput, 412 RegExpSetInput, DONT_ENUM | DONT_DELETE); 413 %DefineOrRedefineAccessorProperty($RegExp, '$input', RegExpGetInput, 414 RegExpSetInput, DONT_ENUM | DONT_DELETE); 415 416 // The properties multiline and $* are aliases for each other. When this 417 // value is set in SpiderMonkey, the value it is set to is coerced to a 418 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey 419 // the value of the expression 'RegExp.multiline = null' (for instance) is the 420 // boolean false (i.e., the value after coercion), while in V8 it is the value 421 // null (i.e., the value before coercion). 422 423 // Getter and setter for multiline. 424 var multiline = false; 425 var RegExpGetMultiline = function() { return multiline; }; 426 var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; }; 427 428 %DefineOrRedefineAccessorProperty($RegExp, 'multiline', RegExpGetMultiline, 429 RegExpSetMultiline, DONT_DELETE); 430 %DefineOrRedefineAccessorProperty($RegExp, '$*', RegExpGetMultiline, 431 RegExpSetMultiline, 432 DONT_ENUM | DONT_DELETE); 433 434 435 var NoOpSetter = function(ignored) {}; 436 437 438 // Static properties set by a successful match. 439 %DefineOrRedefineAccessorProperty($RegExp, 'lastMatch', RegExpGetLastMatch, 440 NoOpSetter, DONT_DELETE); 441 %DefineOrRedefineAccessorProperty($RegExp, '$&', RegExpGetLastMatch, 442 NoOpSetter, DONT_ENUM | DONT_DELETE); 443 %DefineOrRedefineAccessorProperty($RegExp, 'lastParen', RegExpGetLastParen, 444 NoOpSetter, DONT_DELETE); 445 %DefineOrRedefineAccessorProperty($RegExp, '$+', RegExpGetLastParen, 446 NoOpSetter, DONT_ENUM | DONT_DELETE); 447 %DefineOrRedefineAccessorProperty($RegExp, 'leftContext', 448 RegExpGetLeftContext, NoOpSetter, 449 DONT_DELETE); 450 %DefineOrRedefineAccessorProperty($RegExp, '$`', RegExpGetLeftContext, 451 NoOpSetter, DONT_ENUM | DONT_DELETE); 452 %DefineOrRedefineAccessorProperty($RegExp, 'rightContext', 453 RegExpGetRightContext, NoOpSetter, 454 DONT_DELETE); 455 %DefineOrRedefineAccessorProperty($RegExp, "$'", RegExpGetRightContext, 456 NoOpSetter, DONT_ENUM | DONT_DELETE); 457 458 for (var i = 1; i < 10; ++i) { 459 %DefineOrRedefineAccessorProperty($RegExp, '$' + i, 460 RegExpMakeCaptureGetter(i), NoOpSetter, 461 DONT_DELETE); 462 } 463 %ToFastProperties($RegExp); 464} 465 466SetUpRegExp(); 467