1// Copyright 2012 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5(function(global, utils) { 6 7%CheckIsBootstrapping(); 8 9// ------------------------------------------------------------------- 10// Imports 11 12var ExpandReplacement; 13var GlobalArray = global.Array; 14var GlobalObject = global.Object; 15var GlobalRegExp = global.RegExp; 16var GlobalRegExpPrototype; 17var InternalArray = utils.InternalArray; 18var InternalPackedArray = utils.InternalPackedArray; 19var MakeTypeError; 20var MaxSimple; 21var MinSimple; 22var matchSymbol = utils.ImportNow("match_symbol"); 23var replaceSymbol = utils.ImportNow("replace_symbol"); 24var searchSymbol = utils.ImportNow("search_symbol"); 25var speciesSymbol = utils.ImportNow("species_symbol"); 26var splitSymbol = utils.ImportNow("split_symbol"); 27var SpeciesConstructor; 28 29utils.Import(function(from) { 30 ExpandReplacement = from.ExpandReplacement; 31 MakeTypeError = from.MakeTypeError; 32 MaxSimple = from.MaxSimple; 33 MinSimple = from.MinSimple; 34 SpeciesConstructor = from.SpeciesConstructor; 35}); 36 37// ------------------------------------------------------------------- 38 39// Property of the builtins object for recording the result of the last 40// regexp match. The property RegExpLastMatchInfo includes the matchIndices 41// array of the last successful regexp match (an array of start/end index 42// pairs for the match and all the captured substrings), the invariant is 43// that there are at least two capture indeces. The array also contains 44// the subject string for the last successful match. 45var RegExpLastMatchInfo = new InternalPackedArray( 46 2, // REGEXP_NUMBER_OF_CAPTURES 47 "", // Last subject. 48 UNDEFINED, // Last input - settable with RegExpSetInput. 49 0, // REGEXP_FIRST_CAPTURE + 0 50 0 // REGEXP_FIRST_CAPTURE + 1 51); 52 53// ------------------------------------------------------------------- 54 55// ES#sec-isregexp IsRegExp ( argument ) 56function IsRegExp(o) { 57 if (!IS_RECEIVER(o)) return false; 58 var is_regexp = o[matchSymbol]; 59 if (!IS_UNDEFINED(is_regexp)) return TO_BOOLEAN(is_regexp); 60 return IS_REGEXP(o); 61} 62 63 64// ES#sec-regexpinitialize 65// Runtime Semantics: RegExpInitialize ( obj, pattern, flags ) 66function RegExpInitialize(object, pattern, flags) { 67 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern); 68 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags); 69 %RegExpInitializeAndCompile(object, pattern, flags); 70 return object; 71} 72 73 74function PatternFlags(pattern) { 75 return (REGEXP_GLOBAL(pattern) ? 'g' : '') + 76 (REGEXP_IGNORE_CASE(pattern) ? 'i' : '') + 77 (REGEXP_MULTILINE(pattern) ? 'm' : '') + 78 (REGEXP_UNICODE(pattern) ? 'u' : '') + 79 (REGEXP_STICKY(pattern) ? 'y' : ''); 80} 81 82 83// ES#sec-regexp-pattern-flags 84// RegExp ( pattern, flags ) 85function RegExpConstructor(pattern, flags) { 86 var newtarget = new.target; 87 var pattern_is_regexp = IsRegExp(pattern); 88 89 if (IS_UNDEFINED(newtarget)) { 90 newtarget = GlobalRegExp; 91 92 // ES6 section 21.2.3.1 step 3.b 93 if (pattern_is_regexp && IS_UNDEFINED(flags) && 94 pattern.constructor === newtarget) { 95 return pattern; 96 } 97 } 98 99 if (IS_REGEXP(pattern)) { 100 if (IS_UNDEFINED(flags)) flags = PatternFlags(pattern); 101 pattern = REGEXP_SOURCE(pattern); 102 103 } else if (pattern_is_regexp) { 104 var input_pattern = pattern; 105 pattern = pattern.source; 106 if (IS_UNDEFINED(flags)) flags = input_pattern.flags; 107 } 108 109 var object = %_NewObject(GlobalRegExp, newtarget); 110 return RegExpInitialize(object, pattern, flags); 111} 112 113 114// ES#sec-regexp.prototype.compile RegExp.prototype.compile (pattern, flags) 115function RegExpCompileJS(pattern, flags) { 116 if (!IS_REGEXP(this)) { 117 throw MakeTypeError(kIncompatibleMethodReceiver, 118 "RegExp.prototype.compile", this); 119 } 120 121 if (IS_REGEXP(pattern)) { 122 if (!IS_UNDEFINED(flags)) throw MakeTypeError(kRegExpFlags); 123 124 flags = PatternFlags(pattern); 125 pattern = REGEXP_SOURCE(pattern); 126 } 127 128 RegExpInitialize(this, pattern, flags); 129 130 // Return undefined for compatibility with JSC. 131 // See http://crbug.com/585775 for web compat details. 132} 133 134 135function DoRegExpExec(regexp, string, index) { 136 return %_RegExpExec(regexp, string, index, RegExpLastMatchInfo); 137} 138 139 140// This is kind of performance sensitive, so we want to avoid unnecessary 141// type checks on inputs. But we also don't want to inline it several times 142// manually, so we use a macro :-) 143macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) 144 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; 145 var start = MATCHINFO[CAPTURE0]; 146 var end = MATCHINFO[CAPTURE1]; 147 // Calculate the substring of the first match before creating the result array 148 // to avoid an unnecessary write barrier storing the first result. 149 var first = %_SubString(STRING, start, end); 150 var result = %_RegExpConstructResult(numResults, start, STRING); 151 result[0] = first; 152 if (numResults == 1) return result; 153 var j = REGEXP_FIRST_CAPTURE + 2; 154 for (var i = 1; i < numResults; i++) { 155 start = MATCHINFO[j++]; 156 if (start != -1) { 157 end = MATCHINFO[j]; 158 result[i] = %_SubString(STRING, start, end); 159 } 160 j++; 161 } 162 return result; 163endmacro 164 165 166function RegExpExecNoTests(regexp, string, start) { 167 // Must be called with RegExp, string and positive integer as arguments. 168 var matchInfo = %_RegExpExec(regexp, string, start, RegExpLastMatchInfo); 169 if (matchInfo !== null) { 170 // ES6 21.2.5.2.2 step 18. 171 if (REGEXP_STICKY(regexp)) regexp.lastIndex = matchInfo[CAPTURE1]; 172 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string); 173 } 174 regexp.lastIndex = 0; 175 return null; 176} 177 178 179// ES#sec-regexp.prototype.exec 180// RegExp.prototype.exec ( string ) 181function RegExpSubclassExecJS(string) { 182 if (!IS_REGEXP(this)) { 183 throw MakeTypeError(kIncompatibleMethodReceiver, 184 'RegExp.prototype.exec', this); 185 } 186 187 string = TO_STRING(string); 188 var lastIndex = this.lastIndex; 189 190 // Conversion is required by the ES2015 specification (RegExpBuiltinExec 191 // algorithm, step 4) even if the value is discarded for non-global RegExps. 192 var i = TO_LENGTH(lastIndex); 193 194 var global = TO_BOOLEAN(REGEXP_GLOBAL(this)); 195 var sticky = TO_BOOLEAN(REGEXP_STICKY(this)); 196 var updateLastIndex = global || sticky; 197 if (updateLastIndex) { 198 if (i > string.length) { 199 this.lastIndex = 0; 200 return null; 201 } 202 } else { 203 i = 0; 204 } 205 206 // matchIndices is either null or the RegExpLastMatchInfo array. 207 // TODO(littledan): Whether a RegExp is sticky is compiled into the RegExp 208 // itself, but ES2015 allows monkey-patching this property to differ from 209 // the internal flags. If it differs, recompile a different RegExp? 210 var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo); 211 212 if (IS_NULL(matchIndices)) { 213 this.lastIndex = 0; 214 return null; 215 } 216 217 // Successful match. 218 if (updateLastIndex) { 219 this.lastIndex = RegExpLastMatchInfo[CAPTURE1]; 220 } 221 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string); 222} 223%FunctionRemovePrototype(RegExpSubclassExecJS); 224 225 226// Legacy implementation of RegExp.prototype.exec 227function RegExpExecJS(string) { 228 if (!IS_REGEXP(this)) { 229 throw MakeTypeError(kIncompatibleMethodReceiver, 230 'RegExp.prototype.exec', this); 231 } 232 233 string = TO_STRING(string); 234 var lastIndex = this.lastIndex; 235 236 // Conversion is required by the ES2015 specification (RegExpBuiltinExec 237 // algorithm, step 4) even if the value is discarded for non-global RegExps. 238 var i = TO_LENGTH(lastIndex); 239 240 var updateLastIndex = REGEXP_GLOBAL(this) || REGEXP_STICKY(this); 241 if (updateLastIndex) { 242 if (i < 0 || i > string.length) { 243 this.lastIndex = 0; 244 return null; 245 } 246 } else { 247 i = 0; 248 } 249 250 // matchIndices is either null or the RegExpLastMatchInfo array. 251 var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo); 252 253 if (IS_NULL(matchIndices)) { 254 this.lastIndex = 0; 255 return null; 256 } 257 258 // Successful match. 259 if (updateLastIndex) { 260 this.lastIndex = RegExpLastMatchInfo[CAPTURE1]; 261 } 262 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string); 263} 264 265 266// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S ) 267// Also takes an optional exec method in case our caller 268// has already fetched exec. 269function RegExpSubclassExec(regexp, string, exec) { 270 if (IS_UNDEFINED(exec)) { 271 exec = regexp.exec; 272 } 273 if (IS_CALLABLE(exec)) { 274 var result = %_Call(exec, regexp, string); 275 if (!IS_RECEIVER(result) && !IS_NULL(result)) { 276 throw MakeTypeError(kInvalidRegExpExecResult); 277 } 278 return result; 279 } 280 return %_Call(RegExpExecJS, regexp, string); 281} 282%SetForceInlineFlag(RegExpSubclassExec); 283 284 285// One-element cache for the simplified test regexp. 286var regexp_key; 287var regexp_val; 288 289// Legacy implementation of RegExp.prototype.test 290// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be 291// that test is defined in terms of String.prototype.exec. However, it probably 292// means the original value of String.prototype.exec, which is what everybody 293// else implements. 294function RegExpTest(string) { 295 if (!IS_REGEXP(this)) { 296 throw MakeTypeError(kIncompatibleMethodReceiver, 297 'RegExp.prototype.test', this); 298 } 299 string = TO_STRING(string); 300 301 var lastIndex = this.lastIndex; 302 303 // Conversion is required by the ES2015 specification (RegExpBuiltinExec 304 // algorithm, step 4) even if the value is discarded for non-global RegExps. 305 var i = TO_LENGTH(lastIndex); 306 307 if (REGEXP_GLOBAL(this) || REGEXP_STICKY(this)) { 308 if (i < 0 || i > string.length) { 309 this.lastIndex = 0; 310 return false; 311 } 312 // matchIndices is either null or the RegExpLastMatchInfo array. 313 var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo); 314 if (IS_NULL(matchIndices)) { 315 this.lastIndex = 0; 316 return false; 317 } 318 this.lastIndex = RegExpLastMatchInfo[CAPTURE1]; 319 return true; 320 } else { 321 // Non-global, non-sticky regexp. 322 // Remove irrelevant preceeding '.*' in a test regexp. The expression 323 // checks whether this.source starts with '.*' and that the third char is 324 // not a '?'. But see https://code.google.com/p/v8/issues/detail?id=3560 325 var regexp = this; 326 var source = REGEXP_SOURCE(regexp); 327 if (source.length >= 3 && 328 %_StringCharCodeAt(source, 0) == 46 && // '.' 329 %_StringCharCodeAt(source, 1) == 42 && // '*' 330 %_StringCharCodeAt(source, 2) != 63) { // '?' 331 regexp = TrimRegExp(regexp); 332 } 333 // matchIndices is either null or the RegExpLastMatchInfo array. 334 var matchIndices = %_RegExpExec(regexp, string, 0, RegExpLastMatchInfo); 335 if (IS_NULL(matchIndices)) { 336 this.lastIndex = 0; 337 return false; 338 } 339 return true; 340 } 341} 342 343 344// ES#sec-regexp.prototype.test RegExp.prototype.test ( S ) 345function RegExpSubclassTest(string) { 346 if (!IS_RECEIVER(this)) { 347 throw MakeTypeError(kIncompatibleMethodReceiver, 348 'RegExp.prototype.test', this); 349 } 350 string = TO_STRING(string); 351 var match = RegExpSubclassExec(this, string); 352 return !IS_NULL(match); 353} 354%FunctionRemovePrototype(RegExpSubclassTest); 355 356function TrimRegExp(regexp) { 357 if (regexp_key !== regexp) { 358 regexp_key = regexp; 359 regexp_val = 360 new GlobalRegExp( 361 %_SubString(REGEXP_SOURCE(regexp), 2, REGEXP_SOURCE(regexp).length), 362 (REGEXP_IGNORE_CASE(regexp) ? REGEXP_MULTILINE(regexp) ? "im" : "i" 363 : REGEXP_MULTILINE(regexp) ? "m" : "")); 364 } 365 return regexp_val; 366} 367 368 369function RegExpToString() { 370 if (!IS_RECEIVER(this)) { 371 throw MakeTypeError( 372 kIncompatibleMethodReceiver, 'RegExp.prototype.toString', this); 373 } 374 if (this === GlobalRegExpPrototype) { 375 %IncrementUseCounter(kRegExpPrototypeToString); 376 } 377 return '/' + TO_STRING(this.source) + '/' + TO_STRING(this.flags); 378} 379 380 381function AtSurrogatePair(subject, index) { 382 if (index + 1 >= subject.length) return false; 383 var first = %_StringCharCodeAt(subject, index); 384 if (first < 0xD800 || first > 0xDBFF) return false; 385 var second = %_StringCharCodeAt(subject, index + 1); 386 return second >= 0xDC00 || second <= 0xDFFF; 387} 388 389 390// Legacy implementation of RegExp.prototype[Symbol.split] which 391// doesn't properly call the underlying exec, @@species methods 392function RegExpSplit(string, limit) { 393 // TODO(yangguo): allow non-regexp receivers. 394 if (!IS_REGEXP(this)) { 395 throw MakeTypeError(kIncompatibleMethodReceiver, 396 "RegExp.prototype.@@split", this); 397 } 398 var separator = this; 399 var subject = TO_STRING(string); 400 401 limit = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit); 402 var length = subject.length; 403 404 if (limit === 0) return []; 405 406 if (length === 0) { 407 if (DoRegExpExec(separator, subject, 0, 0) !== null) return []; 408 return [subject]; 409 } 410 411 var currentIndex = 0; 412 var startIndex = 0; 413 var startMatch = 0; 414 var result = new InternalArray(); 415 416 outer_loop: 417 while (true) { 418 if (startIndex === length) { 419 result[result.length] = %_SubString(subject, currentIndex, length); 420 break; 421 } 422 423 var matchInfo = DoRegExpExec(separator, subject, startIndex); 424 if (matchInfo === null || length === (startMatch = matchInfo[CAPTURE0])) { 425 result[result.length] = %_SubString(subject, currentIndex, length); 426 break; 427 } 428 var endIndex = matchInfo[CAPTURE1]; 429 430 // We ignore a zero-length match at the currentIndex. 431 if (startIndex === endIndex && endIndex === currentIndex) { 432 if (REGEXP_UNICODE(this) && AtSurrogatePair(subject, startIndex)) { 433 startIndex += 2; 434 } else { 435 startIndex++; 436 } 437 continue; 438 } 439 440 result[result.length] = %_SubString(subject, currentIndex, startMatch); 441 442 if (result.length === limit) break; 443 444 var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE; 445 for (var i = REGEXP_FIRST_CAPTURE + 2; i < matchinfo_len; ) { 446 var start = matchInfo[i++]; 447 var end = matchInfo[i++]; 448 if (end != -1) { 449 result[result.length] = %_SubString(subject, start, end); 450 } else { 451 result[result.length] = UNDEFINED; 452 } 453 if (result.length === limit) break outer_loop; 454 } 455 456 startIndex = currentIndex = endIndex; 457 } 458 459 var array_result = []; 460 %MoveArrayContents(result, array_result); 461 return array_result; 462} 463 464 465// ES#sec-regexp.prototype-@@split 466// RegExp.prototype [ @@split ] ( string, limit ) 467function RegExpSubclassSplit(string, limit) { 468 if (!IS_RECEIVER(this)) { 469 throw MakeTypeError(kIncompatibleMethodReceiver, 470 "RegExp.prototype.@@split", this); 471 } 472 string = TO_STRING(string); 473 var constructor = SpeciesConstructor(this, GlobalRegExp); 474 var flags = TO_STRING(this.flags); 475 476 // TODO(adamk): this fast path is wrong with respect to this.global 477 // and this.sticky, but hopefully the spec will remove those gets 478 // and thus make the assumption of 'exec' having no side-effects 479 // more correct. Also, we doesn't ensure that 'exec' is actually 480 // a data property on RegExp.prototype. 481 var exec; 482 if (IS_REGEXP(this) && constructor === GlobalRegExp) { 483 exec = this.exec; 484 if (exec === RegExpSubclassExecJS) { 485 return %_Call(RegExpSplit, this, string, limit); 486 } 487 } 488 489 var unicode = %StringIndexOf(flags, 'u', 0) >= 0; 490 var sticky = %StringIndexOf(flags, 'y', 0) >= 0; 491 var newFlags = sticky ? flags : flags + "y"; 492 var splitter = new constructor(this, newFlags); 493 var array = new GlobalArray(); 494 var arrayIndex = 0; 495 var lim = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit); 496 var size = string.length; 497 var prevStringIndex = 0; 498 if (lim === 0) return array; 499 var result; 500 if (size === 0) { 501 result = RegExpSubclassExec(splitter, string); 502 if (IS_NULL(result)) %AddElement(array, 0, string); 503 return array; 504 } 505 var stringIndex = prevStringIndex; 506 while (stringIndex < size) { 507 splitter.lastIndex = stringIndex; 508 result = RegExpSubclassExec(splitter, string, exec); 509 // Ensure exec will be read again on the next loop through. 510 exec = UNDEFINED; 511 if (IS_NULL(result)) { 512 stringIndex += AdvanceStringIndex(string, stringIndex, unicode); 513 } else { 514 var end = MinSimple(TO_LENGTH(splitter.lastIndex), size); 515 if (end === prevStringIndex) { 516 stringIndex += AdvanceStringIndex(string, stringIndex, unicode); 517 } else { 518 %AddElement( 519 array, arrayIndex, 520 %_SubString(string, prevStringIndex, stringIndex)); 521 arrayIndex++; 522 if (arrayIndex === lim) return array; 523 prevStringIndex = end; 524 var numberOfCaptures = MaxSimple(TO_LENGTH(result.length), 0); 525 for (var i = 1; i < numberOfCaptures; i++) { 526 %AddElement(array, arrayIndex, result[i]); 527 arrayIndex++; 528 if (arrayIndex === lim) return array; 529 } 530 stringIndex = prevStringIndex; 531 } 532 } 533 } 534 %AddElement(array, arrayIndex, 535 %_SubString(string, prevStringIndex, size)); 536 return array; 537} 538%FunctionRemovePrototype(RegExpSubclassSplit); 539 540 541// ES#sec-regexp.prototype-@@match 542// RegExp.prototype [ @@match ] ( string ) 543function RegExpSubclassMatch(string) { 544 if (!IS_RECEIVER(this)) { 545 throw MakeTypeError(kIncompatibleMethodReceiver, 546 "RegExp.prototype.@@match", this); 547 } 548 string = TO_STRING(string); 549 var global = this.global; 550 if (!global) return RegExpSubclassExec(this, string); 551 var unicode = this.unicode; 552 this.lastIndex = 0; 553 var array = new InternalArray(); 554 var n = 0; 555 var result; 556 while (true) { 557 result = RegExpSubclassExec(this, string); 558 if (IS_NULL(result)) { 559 if (n === 0) return null; 560 break; 561 } 562 var matchStr = TO_STRING(result[0]); 563 array[n] = matchStr; 564 if (matchStr === "") SetAdvancedStringIndex(this, string, unicode); 565 n++; 566 } 567 var resultArray = []; 568 %MoveArrayContents(array, resultArray); 569 return resultArray; 570} 571%FunctionRemovePrototype(RegExpSubclassMatch); 572 573 574// Legacy implementation of RegExp.prototype[Symbol.replace] which 575// doesn't properly call the underlying exec method. 576 577// TODO(lrn): This array will survive indefinitely if replace is never 578// called again. However, it will be empty, since the contents are cleared 579// in the finally block. 580var reusableReplaceArray = new InternalArray(4); 581 582// Helper function for replacing regular expressions with the result of a 583// function application in String.prototype.replace. 584function StringReplaceGlobalRegExpWithFunction(subject, regexp, replace) { 585 var resultArray = reusableReplaceArray; 586 if (resultArray) { 587 reusableReplaceArray = null; 588 } else { 589 // Inside a nested replace (replace called from the replacement function 590 // of another replace) or we have failed to set the reusable array 591 // back due to an exception in a replacement function. Create a new 592 // array to use in the future, or until the original is written back. 593 resultArray = new InternalArray(16); 594 } 595 var res = %RegExpExecMultiple(regexp, 596 subject, 597 RegExpLastMatchInfo, 598 resultArray); 599 regexp.lastIndex = 0; 600 if (IS_NULL(res)) { 601 // No matches at all. 602 reusableReplaceArray = resultArray; 603 return subject; 604 } 605 var len = res.length; 606 if (NUMBER_OF_CAPTURES(RegExpLastMatchInfo) == 2) { 607 // If the number of captures is two then there are no explicit captures in 608 // the regexp, just the implicit capture that captures the whole match. In 609 // this case we can simplify quite a bit and end up with something faster. 610 // The builder will consist of some integers that indicate slices of the 611 // input string and some replacements that were returned from the replace 612 // function. 613 var match_start = 0; 614 for (var i = 0; i < len; i++) { 615 var elem = res[i]; 616 if (%_IsSmi(elem)) { 617 // Integers represent slices of the original string. 618 if (elem > 0) { 619 match_start = (elem >> 11) + (elem & 0x7ff); 620 } else { 621 match_start = res[++i] - elem; 622 } 623 } else { 624 var func_result = replace(elem, match_start, subject); 625 // Overwrite the i'th element in the results with the string we got 626 // back from the callback function. 627 res[i] = TO_STRING(func_result); 628 match_start += elem.length; 629 } 630 } 631 } else { 632 for (var i = 0; i < len; i++) { 633 var elem = res[i]; 634 if (!%_IsSmi(elem)) { 635 // elem must be an Array. 636 // Use the apply argument as backing for global RegExp properties. 637 var func_result = %reflect_apply(replace, UNDEFINED, elem); 638 // Overwrite the i'th element in the results with the string we got 639 // back from the callback function. 640 res[i] = TO_STRING(func_result); 641 } 642 } 643 } 644 var result = %StringBuilderConcat(res, len, subject); 645 resultArray.length = 0; 646 reusableReplaceArray = resultArray; 647 return result; 648} 649 650 651// Compute the string of a given regular expression capture. 652function CaptureString(string, lastCaptureInfo, index) { 653 // Scale the index. 654 var scaled = index << 1; 655 // Compute start and end. 656 var start = lastCaptureInfo[CAPTURE(scaled)]; 657 // If start isn't valid, return undefined. 658 if (start < 0) return; 659 var end = lastCaptureInfo[CAPTURE(scaled + 1)]; 660 return %_SubString(string, start, end); 661} 662 663 664function StringReplaceNonGlobalRegExpWithFunction(subject, regexp, replace) { 665 var matchInfo = DoRegExpExec(regexp, subject, 0); 666 if (IS_NULL(matchInfo)) { 667 regexp.lastIndex = 0; 668 return subject; 669 } 670 var index = matchInfo[CAPTURE0]; 671 var result = %_SubString(subject, 0, index); 672 var endOfMatch = matchInfo[CAPTURE1]; 673 // Compute the parameter list consisting of the match, captures, index, 674 // and subject for the replace function invocation. 675 // The number of captures plus one for the match. 676 var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; 677 var replacement; 678 if (m == 1) { 679 // No captures, only the match, which is always valid. 680 var s = %_SubString(subject, index, endOfMatch); 681 // Don't call directly to avoid exposing the built-in global object. 682 replacement = replace(s, index, subject); 683 } else { 684 var parameters = new InternalArray(m + 2); 685 for (var j = 0; j < m; j++) { 686 parameters[j] = CaptureString(subject, matchInfo, j); 687 } 688 parameters[j] = index; 689 parameters[j + 1] = subject; 690 691 replacement = %reflect_apply(replace, UNDEFINED, parameters); 692 } 693 694 result += replacement; // The add method converts to string if necessary. 695 // Can't use matchInfo any more from here, since the function could 696 // overwrite it. 697 return result + %_SubString(subject, endOfMatch, subject.length); 698} 699 700 701function RegExpReplace(string, replace) { 702 if (!IS_REGEXP(this)) { 703 throw MakeTypeError(kIncompatibleMethodReceiver, 704 "RegExp.prototype.@@replace", this); 705 } 706 var subject = TO_STRING(string); 707 var search = this; 708 709 if (!IS_CALLABLE(replace)) { 710 replace = TO_STRING(replace); 711 712 if (!REGEXP_GLOBAL(search)) { 713 // Non-global regexp search, string replace. 714 var match = DoRegExpExec(search, subject, 0); 715 if (match == null) { 716 search.lastIndex = 0 717 return subject; 718 } 719 if (replace.length == 0) { 720 return %_SubString(subject, 0, match[CAPTURE0]) + 721 %_SubString(subject, match[CAPTURE1], subject.length) 722 } 723 return ExpandReplacement(replace, subject, RegExpLastMatchInfo, 724 %_SubString(subject, 0, match[CAPTURE0])) + 725 %_SubString(subject, match[CAPTURE1], subject.length); 726 } 727 728 // Global regexp search, string replace. 729 search.lastIndex = 0; 730 return %StringReplaceGlobalRegExpWithString( 731 subject, search, replace, RegExpLastMatchInfo); 732 } 733 734 if (REGEXP_GLOBAL(search)) { 735 // Global regexp search, function replace. 736 return StringReplaceGlobalRegExpWithFunction(subject, search, replace); 737 } 738 // Non-global regexp search, function replace. 739 return StringReplaceNonGlobalRegExpWithFunction(subject, search, replace); 740} 741 742 743// ES#sec-getsubstitution 744// GetSubstitution(matched, str, position, captures, replacement) 745// Expand the $-expressions in the string and return a new string with 746// the result. 747// TODO(littledan): Call this function from String.prototype.replace instead 748// of the very similar ExpandReplacement in src/js/string.js 749function GetSubstitution(matched, string, position, captures, replacement) { 750 var matchLength = matched.length; 751 var stringLength = string.length; 752 var capturesLength = captures.length; 753 var tailPos = position + matchLength; 754 var result = ""; 755 var pos, expansion, peek, next, scaledIndex, advance, newScaledIndex; 756 757 var next = %StringIndexOf(replacement, '$', 0); 758 if (next < 0) { 759 result += replacement; 760 return result; 761 } 762 763 if (next > 0) result += %_SubString(replacement, 0, next); 764 765 while (true) { 766 expansion = '$'; 767 pos = next + 1; 768 if (pos < replacement.length) { 769 peek = %_StringCharCodeAt(replacement, pos); 770 if (peek == 36) { // $$ 771 ++pos; 772 result += '$'; 773 } else if (peek == 38) { // $& - match 774 ++pos; 775 result += matched; 776 } else if (peek == 96) { // $` - prefix 777 ++pos; 778 result += %_SubString(string, 0, position); 779 } else if (peek == 39) { // $' - suffix 780 ++pos; 781 result += %_SubString(string, tailPos, stringLength); 782 } else if (peek >= 48 && peek <= 57) { 783 // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99 784 scaledIndex = (peek - 48); 785 advance = 1; 786 if (pos + 1 < replacement.length) { 787 next = %_StringCharCodeAt(replacement, pos + 1); 788 if (next >= 48 && next <= 57) { 789 newScaledIndex = scaledIndex * 10 + ((next - 48)); 790 if (newScaledIndex < capturesLength) { 791 scaledIndex = newScaledIndex; 792 advance = 2; 793 } 794 } 795 } 796 if (scaledIndex != 0 && scaledIndex < capturesLength) { 797 var capture = captures[scaledIndex]; 798 if (!IS_UNDEFINED(capture)) result += capture; 799 pos += advance; 800 } else { 801 result += '$'; 802 } 803 } else { 804 result += '$'; 805 } 806 } else { 807 result += '$'; 808 } 809 810 // Go the the next $ in the replacement. 811 next = %StringIndexOf(replacement, '$', pos); 812 813 // Return if there are no more $ characters in the replacement. If we 814 // haven't reached the end, we need to append the suffix. 815 if (next < 0) { 816 if (pos < replacement.length) { 817 result += %_SubString(replacement, pos, replacement.length); 818 } 819 return result; 820 } 821 822 // Append substring between the previous and the next $ character. 823 if (next > pos) { 824 result += %_SubString(replacement, pos, next); 825 } 826 } 827 return result; 828} 829 830 831// ES#sec-advancestringindex 832// AdvanceStringIndex ( S, index, unicode ) 833function AdvanceStringIndex(string, index, unicode) { 834 var increment = 1; 835 if (unicode) { 836 var first = %_StringCharCodeAt(string, index); 837 if (first >= 0xD800 && first <= 0xDBFF && string.length > index + 1) { 838 var second = %_StringCharCodeAt(string, index + 1); 839 if (second >= 0xDC00 && second <= 0xDFFF) { 840 increment = 2; 841 } 842 } 843 } 844 return increment; 845} 846 847 848function SetAdvancedStringIndex(regexp, string, unicode) { 849 var lastIndex = regexp.lastIndex; 850 regexp.lastIndex = lastIndex + 851 AdvanceStringIndex(string, lastIndex, unicode); 852} 853 854 855// ES#sec-regexp.prototype-@@replace 856// RegExp.prototype [ @@replace ] ( string, replaceValue ) 857function RegExpSubclassReplace(string, replace) { 858 if (!IS_RECEIVER(this)) { 859 throw MakeTypeError(kIncompatibleMethodReceiver, 860 "RegExp.prototype.@@replace", this); 861 } 862 string = TO_STRING(string); 863 var length = string.length; 864 var functionalReplace = IS_CALLABLE(replace); 865 if (!functionalReplace) replace = TO_STRING(replace); 866 var global = TO_BOOLEAN(this.global); 867 if (global) { 868 var unicode = TO_BOOLEAN(this.unicode); 869 this.lastIndex = 0; 870 } 871 872 // TODO(adamk): this fast path is wrong with respect to this.global 873 // and this.sticky, but hopefully the spec will remove those gets 874 // and thus make the assumption of 'exec' having no side-effects 875 // more correct. Also, we doesn't ensure that 'exec' is actually 876 // a data property on RegExp.prototype, nor does the fast path 877 // correctly handle lastIndex setting. 878 var exec; 879 if (IS_REGEXP(this)) { 880 exec = this.exec; 881 if (exec === RegExpSubclassExecJS) { 882 return %_Call(RegExpReplace, this, string, replace); 883 } 884 } 885 886 var results = new InternalArray(); 887 var result, replacement; 888 while (true) { 889 result = RegExpSubclassExec(this, string, exec); 890 // Ensure exec will be read again on the next loop through. 891 exec = UNDEFINED; 892 if (IS_NULL(result)) { 893 break; 894 } else { 895 results.push(result); 896 if (!global) break; 897 var matchStr = TO_STRING(result[0]); 898 if (matchStr === "") SetAdvancedStringIndex(this, string, unicode); 899 } 900 } 901 var accumulatedResult = ""; 902 var nextSourcePosition = 0; 903 for (var i = 0; i < results.length; i++) { 904 result = results[i]; 905 var capturesLength = MaxSimple(TO_LENGTH(result.length), 0); 906 var matched = TO_STRING(result[0]); 907 var matchedLength = matched.length; 908 var position = MaxSimple(MinSimple(TO_INTEGER(result.index), length), 0); 909 var captures = new InternalArray(); 910 for (var n = 0; n < capturesLength; n++) { 911 var capture = result[n]; 912 if (!IS_UNDEFINED(capture)) capture = TO_STRING(capture); 913 captures[n] = capture; 914 } 915 if (functionalReplace) { 916 var parameters = new InternalArray(capturesLength + 2); 917 for (var j = 0; j < capturesLength; j++) { 918 parameters[j] = captures[j]; 919 } 920 parameters[j] = position; 921 parameters[j + 1] = string; 922 replacement = %reflect_apply(replace, UNDEFINED, parameters, 0, 923 parameters.length); 924 } else { 925 replacement = GetSubstitution(matched, string, position, captures, 926 replace); 927 } 928 if (position >= nextSourcePosition) { 929 accumulatedResult += 930 %_SubString(string, nextSourcePosition, position) + replacement; 931 nextSourcePosition = position + matchedLength; 932 } 933 } 934 if (nextSourcePosition >= length) return accumulatedResult; 935 return accumulatedResult + %_SubString(string, nextSourcePosition, length); 936} 937%FunctionRemovePrototype(RegExpSubclassReplace); 938 939 940// ES#sec-regexp.prototype-@@search 941// RegExp.prototype [ @@search ] ( string ) 942function RegExpSubclassSearch(string) { 943 if (!IS_RECEIVER(this)) { 944 throw MakeTypeError(kIncompatibleMethodReceiver, 945 "RegExp.prototype.@@search", this); 946 } 947 string = TO_STRING(string); 948 var previousLastIndex = this.lastIndex; 949 this.lastIndex = 0; 950 var result = RegExpSubclassExec(this, string); 951 this.lastIndex = previousLastIndex; 952 if (IS_NULL(result)) return -1; 953 return result.index; 954} 955%FunctionRemovePrototype(RegExpSubclassSearch); 956 957 958// Getters for the static properties lastMatch, lastParen, leftContext, and 959// rightContext of the RegExp constructor. The properties are computed based 960// on the captures array of the last successful match and the subject string 961// of the last successful match. 962function RegExpGetLastMatch() { 963 var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo); 964 return %_SubString(regExpSubject, 965 RegExpLastMatchInfo[CAPTURE0], 966 RegExpLastMatchInfo[CAPTURE1]); 967} 968 969 970function RegExpGetLastParen() { 971 var length = NUMBER_OF_CAPTURES(RegExpLastMatchInfo); 972 if (length <= 2) return ''; // There were no captures. 973 // We match the SpiderMonkey behavior: return the substring defined by the 974 // last pair (after the first pair) of elements of the capture array even if 975 // it is empty. 976 var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo); 977 var start = RegExpLastMatchInfo[CAPTURE(length - 2)]; 978 var end = RegExpLastMatchInfo[CAPTURE(length - 1)]; 979 if (start != -1 && end != -1) { 980 return %_SubString(regExpSubject, start, end); 981 } 982 return ""; 983} 984 985 986function RegExpGetLeftContext() { 987 var start_index; 988 var subject; 989 start_index = RegExpLastMatchInfo[CAPTURE0]; 990 subject = LAST_SUBJECT(RegExpLastMatchInfo); 991 return %_SubString(subject, 0, start_index); 992} 993 994 995function RegExpGetRightContext() { 996 var start_index; 997 var subject; 998 start_index = RegExpLastMatchInfo[CAPTURE1]; 999 subject = LAST_SUBJECT(RegExpLastMatchInfo); 1000 return %_SubString(subject, start_index, subject.length); 1001} 1002 1003 1004// The properties $1..$9 are the first nine capturing substrings of the last 1005// successful match, or ''. The function RegExpMakeCaptureGetter will be 1006// called with indices from 1 to 9. 1007function RegExpMakeCaptureGetter(n) { 1008 return function foo() { 1009 var index = n * 2; 1010 if (index >= NUMBER_OF_CAPTURES(RegExpLastMatchInfo)) return ''; 1011 var matchStart = RegExpLastMatchInfo[CAPTURE(index)]; 1012 var matchEnd = RegExpLastMatchInfo[CAPTURE(index + 1)]; 1013 if (matchStart == -1 || matchEnd == -1) return ''; 1014 return %_SubString(LAST_SUBJECT(RegExpLastMatchInfo), matchStart, matchEnd); 1015 }; 1016} 1017 1018 1019// ES6 21.2.5.3. 1020function RegExpGetFlags() { 1021 if (!IS_RECEIVER(this)) { 1022 throw MakeTypeError( 1023 kRegExpNonObject, "RegExp.prototype.flags", TO_STRING(this)); 1024 } 1025 var result = ''; 1026 if (this.global) result += 'g'; 1027 if (this.ignoreCase) result += 'i'; 1028 if (this.multiline) result += 'm'; 1029 if (this.unicode) result += 'u'; 1030 if (this.sticky) result += 'y'; 1031 return result; 1032} 1033 1034 1035// ES6 21.2.5.4. 1036function RegExpGetGlobal() { 1037 if (!IS_REGEXP(this)) { 1038 // TODO(littledan): Remove this RegExp compat workaround 1039 if (this === GlobalRegExpPrototype) { 1040 %IncrementUseCounter(kRegExpPrototypeOldFlagGetter); 1041 return UNDEFINED; 1042 } 1043 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.global"); 1044 } 1045 return TO_BOOLEAN(REGEXP_GLOBAL(this)); 1046} 1047%SetForceInlineFlag(RegExpGetGlobal); 1048 1049 1050// ES6 21.2.5.5. 1051function RegExpGetIgnoreCase() { 1052 if (!IS_REGEXP(this)) { 1053 // TODO(littledan): Remove this RegExp compat workaround 1054 if (this === GlobalRegExpPrototype) { 1055 %IncrementUseCounter(kRegExpPrototypeOldFlagGetter); 1056 return UNDEFINED; 1057 } 1058 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.ignoreCase"); 1059 } 1060 return TO_BOOLEAN(REGEXP_IGNORE_CASE(this)); 1061} 1062 1063 1064// ES6 21.2.5.7. 1065function RegExpGetMultiline() { 1066 if (!IS_REGEXP(this)) { 1067 // TODO(littledan): Remove this RegExp compat workaround 1068 if (this === GlobalRegExpPrototype) { 1069 %IncrementUseCounter(kRegExpPrototypeOldFlagGetter); 1070 return UNDEFINED; 1071 } 1072 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.multiline"); 1073 } 1074 return TO_BOOLEAN(REGEXP_MULTILINE(this)); 1075} 1076 1077 1078// ES6 21.2.5.10. 1079function RegExpGetSource() { 1080 if (!IS_REGEXP(this)) { 1081 // TODO(littledan): Remove this RegExp compat workaround 1082 if (this === GlobalRegExpPrototype) { 1083 %IncrementUseCounter(kRegExpPrototypeSourceGetter); 1084 return "(?:)"; 1085 } 1086 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.source"); 1087 } 1088 return REGEXP_SOURCE(this); 1089} 1090 1091 1092// ES6 21.2.5.12. 1093function RegExpGetSticky() { 1094 if (!IS_REGEXP(this)) { 1095 // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it 1096 // TODO(littledan): Remove this workaround or standardize it 1097 if (this === GlobalRegExpPrototype) { 1098 %IncrementUseCounter(kRegExpPrototypeStickyGetter); 1099 return UNDEFINED; 1100 } 1101 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.sticky"); 1102 } 1103 return TO_BOOLEAN(REGEXP_STICKY(this)); 1104} 1105%SetForceInlineFlag(RegExpGetSticky); 1106 1107 1108// ES6 21.2.5.15. 1109function RegExpGetUnicode() { 1110 if (!IS_REGEXP(this)) { 1111 // TODO(littledan): Remove this RegExp compat workaround 1112 if (this === GlobalRegExpPrototype) { 1113 %IncrementUseCounter(kRegExpPrototypeUnicodeGetter); 1114 return UNDEFINED; 1115 } 1116 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.unicode"); 1117 } 1118 return TO_BOOLEAN(REGEXP_UNICODE(this)); 1119} 1120%SetForceInlineFlag(RegExpGetUnicode); 1121 1122 1123function RegExpSpecies() { 1124 return this; 1125} 1126 1127 1128// ------------------------------------------------------------------- 1129 1130%FunctionSetInstanceClassName(GlobalRegExp, 'RegExp'); 1131GlobalRegExpPrototype = new GlobalObject(); 1132%FunctionSetPrototype(GlobalRegExp, GlobalRegExpPrototype); 1133%AddNamedProperty( 1134 GlobalRegExp.prototype, 'constructor', GlobalRegExp, DONT_ENUM); 1135%SetCode(GlobalRegExp, RegExpConstructor); 1136 1137utils.InstallGetter(GlobalRegExp, speciesSymbol, RegExpSpecies); 1138 1139utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [ 1140 "exec", RegExpSubclassExecJS, 1141 "test", RegExpSubclassTest, 1142 "toString", RegExpToString, 1143 "compile", RegExpCompileJS, 1144 matchSymbol, RegExpSubclassMatch, 1145 replaceSymbol, RegExpSubclassReplace, 1146 searchSymbol, RegExpSubclassSearch, 1147 splitSymbol, RegExpSubclassSplit, 1148]); 1149 1150utils.InstallGetter(GlobalRegExp.prototype, 'flags', RegExpGetFlags); 1151utils.InstallGetter(GlobalRegExp.prototype, 'global', RegExpGetGlobal); 1152utils.InstallGetter(GlobalRegExp.prototype, 'ignoreCase', RegExpGetIgnoreCase); 1153utils.InstallGetter(GlobalRegExp.prototype, 'multiline', RegExpGetMultiline); 1154utils.InstallGetter(GlobalRegExp.prototype, 'source', RegExpGetSource); 1155utils.InstallGetter(GlobalRegExp.prototype, 'sticky', RegExpGetSticky); 1156utils.InstallGetter(GlobalRegExp.prototype, 'unicode', RegExpGetUnicode); 1157 1158// The properties `input` and `$_` are aliases for each other. When this 1159// value is set the value it is set to is coerced to a string. 1160// Getter and setter for the input. 1161var RegExpGetInput = function() { 1162 var regExpInput = LAST_INPUT(RegExpLastMatchInfo); 1163 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; 1164}; 1165var RegExpSetInput = function(string) { 1166 LAST_INPUT(RegExpLastMatchInfo) = TO_STRING(string); 1167}; 1168 1169%OptimizeObjectForAddingMultipleProperties(GlobalRegExp, 22); 1170utils.InstallGetterSetter(GlobalRegExp, 'input', RegExpGetInput, RegExpSetInput, 1171 DONT_DELETE); 1172utils.InstallGetterSetter(GlobalRegExp, '$_', RegExpGetInput, RegExpSetInput, 1173 DONT_ENUM | DONT_DELETE); 1174 1175 1176var NoOpSetter = function(ignored) {}; 1177 1178 1179// Static properties set by a successful match. 1180utils.InstallGetterSetter(GlobalRegExp, 'lastMatch', RegExpGetLastMatch, 1181 NoOpSetter, DONT_DELETE); 1182utils.InstallGetterSetter(GlobalRegExp, '$&', RegExpGetLastMatch, NoOpSetter, 1183 DONT_ENUM | DONT_DELETE); 1184utils.InstallGetterSetter(GlobalRegExp, 'lastParen', RegExpGetLastParen, 1185 NoOpSetter, DONT_DELETE); 1186utils.InstallGetterSetter(GlobalRegExp, '$+', RegExpGetLastParen, NoOpSetter, 1187 DONT_ENUM | DONT_DELETE); 1188utils.InstallGetterSetter(GlobalRegExp, 'leftContext', RegExpGetLeftContext, 1189 NoOpSetter, DONT_DELETE); 1190utils.InstallGetterSetter(GlobalRegExp, '$`', RegExpGetLeftContext, NoOpSetter, 1191 DONT_ENUM | DONT_DELETE); 1192utils.InstallGetterSetter(GlobalRegExp, 'rightContext', RegExpGetRightContext, 1193 NoOpSetter, DONT_DELETE); 1194utils.InstallGetterSetter(GlobalRegExp, "$'", RegExpGetRightContext, NoOpSetter, 1195 DONT_ENUM | DONT_DELETE); 1196 1197for (var i = 1; i < 10; ++i) { 1198 utils.InstallGetterSetter(GlobalRegExp, '$' + i, RegExpMakeCaptureGetter(i), 1199 NoOpSetter, DONT_DELETE); 1200} 1201%ToFastProperties(GlobalRegExp); 1202 1203// ------------------------------------------------------------------- 1204// Internal 1205 1206var InternalRegExpMatchInfo = new InternalPackedArray(2, "", UNDEFINED, 0, 0); 1207 1208function InternalRegExpMatch(regexp, subject) { 1209 var matchInfo = %_RegExpExec(regexp, subject, 0, InternalRegExpMatchInfo); 1210 if (!IS_NULL(matchInfo)) { 1211 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, subject); 1212 } 1213 return null; 1214} 1215 1216function InternalRegExpReplace(regexp, subject, replacement) { 1217 return %StringReplaceGlobalRegExpWithString( 1218 subject, regexp, replacement, InternalRegExpMatchInfo); 1219} 1220 1221// ------------------------------------------------------------------- 1222// Exports 1223 1224utils.Export(function(to) { 1225 to.InternalRegExpMatch = InternalRegExpMatch; 1226 to.InternalRegExpReplace = InternalRegExpReplace; 1227 to.IsRegExp = IsRegExp; 1228 to.RegExpExec = DoRegExpExec; 1229 to.RegExpInitialize = RegExpInitialize; 1230 to.RegExpLastMatchInfo = RegExpLastMatchInfo; 1231 to.RegExpTest = RegExpTest; 1232}); 1233 1234}) 1235