• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/runtime/runtime-utils.h"
6 
7 #include "src/arguments.h"
8 #include "src/conversions-inl.h"
9 #include "src/isolate-inl.h"
10 #include "src/messages.h"
11 #include "src/regexp/jsregexp-inl.h"
12 #include "src/regexp/jsregexp.h"
13 #include "src/regexp/regexp-utils.h"
14 #include "src/string-builder.h"
15 #include "src/string-search.h"
16 
17 namespace v8 {
18 namespace internal {
19 
20 class CompiledReplacement {
21  public:
CompiledReplacement(Zone * zone)22   explicit CompiledReplacement(Zone* zone)
23       : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
24 
25   // Return whether the replacement is simple.
26   bool Compile(Handle<String> replacement, int capture_count,
27                int subject_length);
28 
29   // Use Apply only if Compile returned false.
30   void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
31              int32_t* match);
32 
33   // Number of distinct parts of the replacement pattern.
parts()34   int parts() { return parts_.length(); }
35 
zone() const36   Zone* zone() const { return zone_; }
37 
38  private:
39   enum PartType {
40     SUBJECT_PREFIX = 1,
41     SUBJECT_SUFFIX,
42     SUBJECT_CAPTURE,
43     REPLACEMENT_SUBSTRING,
44     REPLACEMENT_STRING,
45     NUMBER_OF_PART_TYPES
46   };
47 
48   struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart49     static inline ReplacementPart SubjectMatch() {
50       return ReplacementPart(SUBJECT_CAPTURE, 0);
51     }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart52     static inline ReplacementPart SubjectCapture(int capture_index) {
53       return ReplacementPart(SUBJECT_CAPTURE, capture_index);
54     }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart55     static inline ReplacementPart SubjectPrefix() {
56       return ReplacementPart(SUBJECT_PREFIX, 0);
57     }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart58     static inline ReplacementPart SubjectSuffix(int subject_length) {
59       return ReplacementPart(SUBJECT_SUFFIX, subject_length);
60     }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart61     static inline ReplacementPart ReplacementString() {
62       return ReplacementPart(REPLACEMENT_STRING, 0);
63     }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart64     static inline ReplacementPart ReplacementSubString(int from, int to) {
65       DCHECK(from >= 0);
66       DCHECK(to > from);
67       return ReplacementPart(-from, to);
68     }
69 
70     // If tag <= 0 then it is the negation of a start index of a substring of
71     // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart72     ReplacementPart(int tag, int data) : tag(tag), data(data) {
73       // Must be non-positive or a PartType value.
74       DCHECK(tag < NUMBER_OF_PART_TYPES);
75     }
76     // Either a value of PartType or a non-positive number that is
77     // the negation of an index into the replacement string.
78     int tag;
79     // The data value's interpretation depends on the value of tag:
80     // tag == SUBJECT_PREFIX ||
81     // tag == SUBJECT_SUFFIX:  data is unused.
82     // tag == SUBJECT_CAPTURE: data is the number of the capture.
83     // tag == REPLACEMENT_SUBSTRING ||
84     // tag == REPLACEMENT_STRING:    data is index into array of substrings
85     //                               of the replacement string.
86     // tag <= 0: Temporary representation of the substring of the replacement
87     //           string ranging over -tag .. data.
88     //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
89     //           substring objects.
90     int data;
91   };
92 
93   template <typename Char>
ParseReplacementPattern(ZoneList<ReplacementPart> * parts,Vector<Char> characters,int capture_count,int subject_length,Zone * zone)94   bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
95                                Vector<Char> characters, int capture_count,
96                                int subject_length, Zone* zone) {
97     int length = characters.length();
98     int last = 0;
99     for (int i = 0; i < length; i++) {
100       Char c = characters[i];
101       if (c == '$') {
102         int next_index = i + 1;
103         if (next_index == length) {  // No next character!
104           break;
105         }
106         Char c2 = characters[next_index];
107         switch (c2) {
108           case '$':
109             if (i > last) {
110               // There is a substring before. Include the first "$".
111               parts->Add(
112                   ReplacementPart::ReplacementSubString(last, next_index),
113                   zone);
114               last = next_index + 1;  // Continue after the second "$".
115             } else {
116               // Let the next substring start with the second "$".
117               last = next_index;
118             }
119             i = next_index;
120             break;
121           case '`':
122             if (i > last) {
123               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
124             }
125             parts->Add(ReplacementPart::SubjectPrefix(), zone);
126             i = next_index;
127             last = i + 1;
128             break;
129           case '\'':
130             if (i > last) {
131               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
132             }
133             parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
134             i = next_index;
135             last = i + 1;
136             break;
137           case '&':
138             if (i > last) {
139               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
140             }
141             parts->Add(ReplacementPart::SubjectMatch(), zone);
142             i = next_index;
143             last = i + 1;
144             break;
145           case '0':
146           case '1':
147           case '2':
148           case '3':
149           case '4':
150           case '5':
151           case '6':
152           case '7':
153           case '8':
154           case '9': {
155             int capture_ref = c2 - '0';
156             if (capture_ref > capture_count) {
157               i = next_index;
158               continue;
159             }
160             int second_digit_index = next_index + 1;
161             if (second_digit_index < length) {
162               // Peek ahead to see if we have two digits.
163               Char c3 = characters[second_digit_index];
164               if ('0' <= c3 && c3 <= '9') {  // Double digits.
165                 int double_digit_ref = capture_ref * 10 + c3 - '0';
166                 if (double_digit_ref <= capture_count) {
167                   next_index = second_digit_index;
168                   capture_ref = double_digit_ref;
169                 }
170               }
171             }
172             if (capture_ref > 0) {
173               if (i > last) {
174                 parts->Add(ReplacementPart::ReplacementSubString(last, i),
175                            zone);
176               }
177               DCHECK(capture_ref <= capture_count);
178               parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
179               last = next_index + 1;
180             }
181             i = next_index;
182             break;
183           }
184           default:
185             i = next_index;
186             break;
187         }
188       }
189     }
190     if (length > last) {
191       if (last == 0) {
192         // Replacement is simple.  Do not use Apply to do the replacement.
193         return true;
194       } else {
195         parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
196       }
197     }
198     return false;
199   }
200 
201   ZoneList<ReplacementPart> parts_;
202   ZoneList<Handle<String> > replacement_substrings_;
203   Zone* zone_;
204 };
205 
206 
Compile(Handle<String> replacement,int capture_count,int subject_length)207 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
208                                   int subject_length) {
209   {
210     DisallowHeapAllocation no_gc;
211     String::FlatContent content = replacement->GetFlatContent();
212     DCHECK(content.IsFlat());
213     bool simple = false;
214     if (content.IsOneByte()) {
215       simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
216                                        capture_count, subject_length, zone());
217     } else {
218       DCHECK(content.IsTwoByte());
219       simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
220                                        capture_count, subject_length, zone());
221     }
222     if (simple) return true;
223   }
224 
225   Isolate* isolate = replacement->GetIsolate();
226   // Find substrings of replacement string and create them as String objects.
227   int substring_index = 0;
228   for (int i = 0, n = parts_.length(); i < n; i++) {
229     int tag = parts_[i].tag;
230     if (tag <= 0) {  // A replacement string slice.
231       int from = -tag;
232       int to = parts_[i].data;
233       replacement_substrings_.Add(
234           isolate->factory()->NewSubString(replacement, from, to), zone());
235       parts_[i].tag = REPLACEMENT_SUBSTRING;
236       parts_[i].data = substring_index;
237       substring_index++;
238     } else if (tag == REPLACEMENT_STRING) {
239       replacement_substrings_.Add(replacement, zone());
240       parts_[i].data = substring_index;
241       substring_index++;
242     }
243   }
244   return false;
245 }
246 
247 
Apply(ReplacementStringBuilder * builder,int match_from,int match_to,int32_t * match)248 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
249                                 int match_from, int match_to, int32_t* match) {
250   DCHECK_LT(0, parts_.length());
251   for (int i = 0, n = parts_.length(); i < n; i++) {
252     ReplacementPart part = parts_[i];
253     switch (part.tag) {
254       case SUBJECT_PREFIX:
255         if (match_from > 0) builder->AddSubjectSlice(0, match_from);
256         break;
257       case SUBJECT_SUFFIX: {
258         int subject_length = part.data;
259         if (match_to < subject_length) {
260           builder->AddSubjectSlice(match_to, subject_length);
261         }
262         break;
263       }
264       case SUBJECT_CAPTURE: {
265         int capture = part.data;
266         int from = match[capture * 2];
267         int to = match[capture * 2 + 1];
268         if (from >= 0 && to > from) {
269           builder->AddSubjectSlice(from, to);
270         }
271         break;
272       }
273       case REPLACEMENT_SUBSTRING:
274       case REPLACEMENT_STRING:
275         builder->AddString(replacement_substrings_[part.data]);
276         break;
277       default:
278         UNREACHABLE();
279     }
280   }
281 }
282 
FindOneByteStringIndices(Vector<const uint8_t> subject,uint8_t pattern,List<int> * indices,unsigned int limit)283 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
284                               List<int>* indices, unsigned int limit) {
285   DCHECK(limit > 0);
286   // Collect indices of pattern in subject using memchr.
287   // Stop after finding at most limit values.
288   const uint8_t* subject_start = subject.start();
289   const uint8_t* subject_end = subject_start + subject.length();
290   const uint8_t* pos = subject_start;
291   while (limit > 0) {
292     pos = reinterpret_cast<const uint8_t*>(
293         memchr(pos, pattern, subject_end - pos));
294     if (pos == NULL) return;
295     indices->Add(static_cast<int>(pos - subject_start));
296     pos++;
297     limit--;
298   }
299 }
300 
FindTwoByteStringIndices(const Vector<const uc16> subject,uc16 pattern,List<int> * indices,unsigned int limit)301 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
302                               List<int>* indices, unsigned int limit) {
303   DCHECK(limit > 0);
304   const uc16* subject_start = subject.start();
305   const uc16* subject_end = subject_start + subject.length();
306   for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
307     if (*pos == pattern) {
308       indices->Add(static_cast<int>(pos - subject_start));
309       limit--;
310     }
311   }
312 }
313 
314 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate * isolate,Vector<const SubjectChar> subject,Vector<const PatternChar> pattern,List<int> * indices,unsigned int limit)315 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
316                        Vector<const PatternChar> pattern, List<int>* indices,
317                        unsigned int limit) {
318   DCHECK(limit > 0);
319   // Collect indices of pattern in subject.
320   // Stop after finding at most limit values.
321   int pattern_length = pattern.length();
322   int index = 0;
323   StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
324   while (limit > 0) {
325     index = search.Search(subject, index);
326     if (index < 0) return;
327     indices->Add(index);
328     index += pattern_length;
329     limit--;
330   }
331 }
332 
FindStringIndicesDispatch(Isolate * isolate,String * subject,String * pattern,List<int> * indices,unsigned int limit)333 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
334                                String* pattern, List<int>* indices,
335                                unsigned int limit) {
336   {
337     DisallowHeapAllocation no_gc;
338     String::FlatContent subject_content = subject->GetFlatContent();
339     String::FlatContent pattern_content = pattern->GetFlatContent();
340     DCHECK(subject_content.IsFlat());
341     DCHECK(pattern_content.IsFlat());
342     if (subject_content.IsOneByte()) {
343       Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
344       if (pattern_content.IsOneByte()) {
345         Vector<const uint8_t> pattern_vector =
346             pattern_content.ToOneByteVector();
347         if (pattern_vector.length() == 1) {
348           FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
349                                    limit);
350         } else {
351           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
352                             limit);
353         }
354       } else {
355         FindStringIndices(isolate, subject_vector,
356                           pattern_content.ToUC16Vector(), indices, limit);
357       }
358     } else {
359       Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
360       if (pattern_content.IsOneByte()) {
361         Vector<const uint8_t> pattern_vector =
362             pattern_content.ToOneByteVector();
363         if (pattern_vector.length() == 1) {
364           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
365                                    limit);
366         } else {
367           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
368                             limit);
369         }
370       } else {
371         Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
372         if (pattern_vector.length() == 1) {
373           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
374                                    limit);
375         } else {
376           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
377                             limit);
378         }
379       }
380     }
381   }
382 }
383 
384 namespace {
GetRewoundRegexpIndicesList(Isolate * isolate)385 List<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
386   List<int>* list = isolate->regexp_indices();
387   list->Rewind(0);
388   return list;
389 }
390 
TruncateRegexpIndicesList(Isolate * isolate)391 void TruncateRegexpIndicesList(Isolate* isolate) {
392   // Same size as smallest zone segment, preserving behavior from the
393   // runtime zone.
394   static const int kMaxRegexpIndicesListCapacity = 8 * KB;
395   if (isolate->regexp_indices()->capacity() > kMaxRegexpIndicesListCapacity) {
396     isolate->regexp_indices()->Clear();  //  Throw away backing storage
397   }
398 }
399 }  // namespace
400 
401 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> pattern_regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)402 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
403     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
404     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
405   DCHECK(subject->IsFlat());
406   DCHECK(replacement->IsFlat());
407 
408   List<int>* indices = GetRewoundRegexpIndicesList(isolate);
409 
410   DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
411   String* pattern =
412       String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
413   int subject_len = subject->length();
414   int pattern_len = pattern->length();
415   int replacement_len = replacement->length();
416 
417   FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xffffffff);
418 
419   int matches = indices->length();
420   if (matches == 0) return *subject;
421 
422   // Detect integer overflow.
423   int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
424                            static_cast<int64_t>(pattern_len)) *
425                               static_cast<int64_t>(matches) +
426                           static_cast<int64_t>(subject_len);
427   int result_len;
428   if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
429     STATIC_ASSERT(String::kMaxLength < kMaxInt);
430     result_len = kMaxInt;  // Provoke exception.
431   } else {
432     result_len = static_cast<int>(result_len_64);
433   }
434 
435   int subject_pos = 0;
436   int result_pos = 0;
437 
438   MaybeHandle<SeqString> maybe_res;
439   if (ResultSeqString::kHasOneByteEncoding) {
440     maybe_res = isolate->factory()->NewRawOneByteString(result_len);
441   } else {
442     maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
443   }
444   Handle<SeqString> untyped_res;
445   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
446   Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
447 
448   for (int i = 0; i < matches; i++) {
449     // Copy non-matched subject content.
450     if (subject_pos < indices->at(i)) {
451       String::WriteToFlat(*subject, result->GetChars() + result_pos,
452                           subject_pos, indices->at(i));
453       result_pos += indices->at(i) - subject_pos;
454     }
455 
456     // Replace match.
457     if (replacement_len > 0) {
458       String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
459                           replacement_len);
460       result_pos += replacement_len;
461     }
462 
463     subject_pos = indices->at(i) + pattern_len;
464   }
465   // Add remaining subject content at the end.
466   if (subject_pos < subject_len) {
467     String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
468                         subject_len);
469   }
470 
471   int32_t match_indices[] = {indices->at(matches - 1),
472                              indices->at(matches - 1) + pattern_len};
473   RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
474 
475   TruncateRegexpIndicesList(isolate);
476 
477   return *result;
478 }
479 
StringReplaceGlobalRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)480 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
481     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
482     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
483   DCHECK(subject->IsFlat());
484   DCHECK(replacement->IsFlat());
485 
486   int capture_count = regexp->CaptureCount();
487   int subject_length = subject->length();
488 
489   // CompiledReplacement uses zone allocation.
490   Zone zone(isolate->allocator(), ZONE_NAME);
491   CompiledReplacement compiled_replacement(&zone);
492   bool simple_replace =
493       compiled_replacement.Compile(replacement, capture_count, subject_length);
494 
495   // Shortcut for simple non-regexp global replacements
496   if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
497     if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
498       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
499           isolate, subject, regexp, replacement, last_match_info);
500     } else {
501       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
502           isolate, subject, regexp, replacement, last_match_info);
503     }
504   }
505 
506   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
507   if (global_cache.HasException()) return isolate->heap()->exception();
508 
509   int32_t* current_match = global_cache.FetchNext();
510   if (current_match == NULL) {
511     if (global_cache.HasException()) return isolate->heap()->exception();
512     return *subject;
513   }
514 
515   // Guessing the number of parts that the final result string is built
516   // from. Global regexps can match any number of times, so we guess
517   // conservatively.
518   int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
519   ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
520 
521   // Number of parts added by compiled replacement plus preceeding
522   // string and possibly suffix after last match.  It is possible for
523   // all components to use two elements when encoded as two smis.
524   const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
525 
526   int prev = 0;
527 
528   do {
529     builder.EnsureCapacity(parts_added_per_loop);
530 
531     int start = current_match[0];
532     int end = current_match[1];
533 
534     if (prev < start) {
535       builder.AddSubjectSlice(prev, start);
536     }
537 
538     if (simple_replace) {
539       builder.AddString(replacement);
540     } else {
541       compiled_replacement.Apply(&builder, start, end, current_match);
542     }
543     prev = end;
544 
545     current_match = global_cache.FetchNext();
546   } while (current_match != NULL);
547 
548   if (global_cache.HasException()) return isolate->heap()->exception();
549 
550   if (prev < subject_length) {
551     builder.EnsureCapacity(2);
552     builder.AddSubjectSlice(prev, subject_length);
553   }
554 
555   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
556                                global_cache.LastSuccessfulMatch());
557 
558   RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
559 }
560 
561 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_info)562 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
563     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
564     Handle<RegExpMatchInfo> last_match_info) {
565   DCHECK(subject->IsFlat());
566 
567   // Shortcut for simple non-regexp global replacements
568   if (regexp->TypeTag() == JSRegExp::ATOM) {
569     Handle<String> empty_string = isolate->factory()->empty_string();
570     if (subject->IsOneByteRepresentation()) {
571       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
572           isolate, subject, regexp, empty_string, last_match_info);
573     } else {
574       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
575           isolate, subject, regexp, empty_string, last_match_info);
576     }
577   }
578 
579   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
580   if (global_cache.HasException()) return isolate->heap()->exception();
581 
582   int32_t* current_match = global_cache.FetchNext();
583   if (current_match == NULL) {
584     if (global_cache.HasException()) return isolate->heap()->exception();
585     return *subject;
586   }
587 
588   int start = current_match[0];
589   int end = current_match[1];
590   int capture_count = regexp->CaptureCount();
591   int subject_length = subject->length();
592 
593   int new_length = subject_length - (end - start);
594   if (new_length == 0) return isolate->heap()->empty_string();
595 
596   Handle<ResultSeqString> answer;
597   if (ResultSeqString::kHasOneByteEncoding) {
598     answer = Handle<ResultSeqString>::cast(
599         isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
600   } else {
601     answer = Handle<ResultSeqString>::cast(
602         isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
603   }
604 
605   int prev = 0;
606   int position = 0;
607 
608   do {
609     start = current_match[0];
610     end = current_match[1];
611     if (prev < start) {
612       // Add substring subject[prev;start] to answer string.
613       String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
614       position += start - prev;
615     }
616     prev = end;
617 
618     current_match = global_cache.FetchNext();
619   } while (current_match != NULL);
620 
621   if (global_cache.HasException()) return isolate->heap()->exception();
622 
623   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
624                                global_cache.LastSuccessfulMatch());
625 
626   if (prev < subject_length) {
627     // Add substring subject[prev;length] to answer string.
628     String::WriteToFlat(*subject, answer->GetChars() + position, prev,
629                         subject_length);
630     position += subject_length - prev;
631   }
632 
633   if (position == 0) return isolate->heap()->empty_string();
634 
635   // Shorten string and fill
636   int string_size = ResultSeqString::SizeFor(position);
637   int allocated_string_size = ResultSeqString::SizeFor(new_length);
638   int delta = allocated_string_size - string_size;
639 
640   answer->set_length(position);
641   if (delta == 0) return *answer;
642 
643   Address end_of_string = answer->address() + string_size;
644   Heap* heap = isolate->heap();
645 
646   // The trimming is performed on a newly allocated object, which is on a
647   // fresly allocated page or on an already swept page. Hence, the sweeper
648   // thread can not get confused with the filler creation. No synchronization
649   // needed.
650   // TODO(hpayer): We should shrink the large object page if the size
651   // of the object changed significantly.
652   if (!heap->lo_space()->Contains(*answer)) {
653     heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
654   }
655   heap->AdjustLiveBytes(*answer, -delta, Heap::CONCURRENT_TO_SWEEPER);
656   return *answer;
657 }
658 
659 namespace {
660 
StringReplaceGlobalRegExpWithStringHelper(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)661 Object* StringReplaceGlobalRegExpWithStringHelper(
662     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
663     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
664   CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
665 
666   subject = String::Flatten(subject);
667 
668   if (replacement->length() == 0) {
669     if (subject->HasOnlyOneByteChars()) {
670       return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
671           isolate, subject, regexp, last_match_info);
672     } else {
673       return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
674           isolate, subject, regexp, last_match_info);
675     }
676   }
677 
678   replacement = String::Flatten(replacement);
679 
680   return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
681                                              replacement, last_match_info);
682 }
683 
684 }  // namespace
685 
RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString)686 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
687   HandleScope scope(isolate);
688   DCHECK(args.length() == 4);
689 
690   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
691   CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
692   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
693   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
694 
695   return StringReplaceGlobalRegExpWithStringHelper(
696       isolate, regexp, subject, replacement, last_match_info);
697 }
698 
RUNTIME_FUNCTION(Runtime_StringSplit)699 RUNTIME_FUNCTION(Runtime_StringSplit) {
700   HandleScope handle_scope(isolate);
701   DCHECK(args.length() == 3);
702   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
703   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
704   CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
705   CHECK(limit > 0);
706 
707   int subject_length = subject->length();
708   int pattern_length = pattern->length();
709   CHECK(pattern_length > 0);
710 
711   if (limit == 0xffffffffu) {
712     FixedArray* last_match_cache_unused;
713     Handle<Object> cached_answer(
714         RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
715                                    &last_match_cache_unused,
716                                    RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
717         isolate);
718     if (*cached_answer != Smi::kZero) {
719       // The cache FixedArray is a COW-array and can therefore be reused.
720       Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
721           Handle<FixedArray>::cast(cached_answer));
722       return *result;
723     }
724   }
725 
726   // The limit can be very large (0xffffffffu), but since the pattern
727   // isn't empty, we can never create more parts than ~half the length
728   // of the subject.
729 
730   subject = String::Flatten(subject);
731   pattern = String::Flatten(pattern);
732 
733   List<int>* indices = GetRewoundRegexpIndicesList(isolate);
734 
735   FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
736 
737   if (static_cast<uint32_t>(indices->length()) < limit) {
738     indices->Add(subject_length);
739   }
740 
741   // The list indices now contains the end of each part to create.
742 
743   // Create JSArray of substrings separated by separator.
744   int part_count = indices->length();
745 
746   Handle<JSArray> result =
747       isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count,
748                                      INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
749 
750   DCHECK(result->HasFastObjectElements());
751 
752   Handle<FixedArray> elements(FixedArray::cast(result->elements()));
753 
754   if (part_count == 1 && indices->at(0) == subject_length) {
755     elements->set(0, *subject);
756   } else {
757     int part_start = 0;
758     FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
759       int part_end = indices->at(i);
760       Handle<String> substring =
761           isolate->factory()->NewProperSubString(subject, part_start, part_end);
762       elements->set(i, *substring);
763       part_start = part_end + pattern_length;
764     });
765   }
766 
767   if (limit == 0xffffffffu) {
768     if (result->HasFastObjectElements()) {
769       RegExpResultsCache::Enter(isolate, subject, pattern, elements,
770                                 isolate->factory()->empty_fixed_array(),
771                                 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
772     }
773   }
774 
775   TruncateRegexpIndicesList(isolate);
776 
777   return *result;
778 }
779 
780 // ES##sec-regexpcreate
781 // RegExpCreate ( P, F )
RUNTIME_FUNCTION(Runtime_RegExpCreate)782 RUNTIME_FUNCTION(Runtime_RegExpCreate) {
783   HandleScope scope(isolate);
784   DCHECK(args.length() == 1);
785   CONVERT_ARG_HANDLE_CHECKED(Object, source_object, 0);
786 
787   Handle<String> source;
788   if (source_object->IsUndefined(isolate)) {
789     source = isolate->factory()->empty_string();
790   } else {
791     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
792         isolate, source, Object::ToString(isolate, source_object));
793   }
794 
795   Handle<Map> map(isolate->regexp_function()->initial_map());
796   Handle<JSRegExp> regexp =
797       Handle<JSRegExp>::cast(isolate->factory()->NewJSObjectFromMap(map));
798 
799   JSRegExp::Flags flags = JSRegExp::kNone;
800 
801   RETURN_FAILURE_ON_EXCEPTION(isolate,
802                               JSRegExp::Initialize(regexp, source, flags));
803 
804   return *regexp;
805 }
806 
RUNTIME_FUNCTION(Runtime_RegExpExec)807 RUNTIME_FUNCTION(Runtime_RegExpExec) {
808   HandleScope scope(isolate);
809   DCHECK(args.length() == 4);
810   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
811   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
812   CONVERT_INT32_ARG_CHECKED(index, 2);
813   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
814   // Due to the way the JS calls are constructed this must be less than the
815   // length of a string, i.e. it is always a Smi.  We check anyway for security.
816   CHECK(index >= 0);
817   CHECK(index <= subject->length());
818   isolate->counters()->regexp_entry_runtime()->Increment();
819   RETURN_RESULT_OR_FAILURE(
820       isolate, RegExpImpl::Exec(regexp, subject, index, last_match_info));
821 }
822 
RUNTIME_FUNCTION(Runtime_RegExpInternalReplace)823 RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) {
824   HandleScope scope(isolate);
825   DCHECK(args.length() == 3);
826   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
827   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
828   CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
829 
830   Handle<RegExpMatchInfo> internal_match_info =
831       isolate->regexp_internal_match_info();
832 
833   return StringReplaceGlobalRegExpWithStringHelper(
834       isolate, regexp, subject, replacement, internal_match_info);
835 }
836 
837 namespace {
838 
839 class MatchInfoBackedMatch : public String::Match {
840  public:
MatchInfoBackedMatch(Isolate * isolate,Handle<String> subject,Handle<RegExpMatchInfo> match_info)841   MatchInfoBackedMatch(Isolate* isolate, Handle<String> subject,
842                        Handle<RegExpMatchInfo> match_info)
843       : isolate_(isolate), match_info_(match_info) {
844     subject_ = String::Flatten(subject);
845   }
846 
GetMatch()847   Handle<String> GetMatch() override {
848     return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
849   }
850 
GetCapture(int i,bool * capture_exists)851   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
852     Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
853         isolate_, match_info_, i, capture_exists);
854     return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
855                              : isolate_->factory()->empty_string();
856   }
857 
GetPrefix()858   Handle<String> GetPrefix() override {
859     const int match_start = match_info_->Capture(0);
860     return isolate_->factory()->NewSubString(subject_, 0, match_start);
861   }
862 
GetSuffix()863   Handle<String> GetSuffix() override {
864     const int match_end = match_info_->Capture(1);
865     return isolate_->factory()->NewSubString(subject_, match_end,
866                                              subject_->length());
867   }
868 
CaptureCount()869   int CaptureCount() override {
870     return match_info_->NumberOfCaptureRegisters() / 2;
871   }
872 
~MatchInfoBackedMatch()873   virtual ~MatchInfoBackedMatch() {}
874 
875  private:
876   Isolate* isolate_;
877   Handle<String> subject_;
878   Handle<RegExpMatchInfo> match_info_;
879 };
880 
881 class VectorBackedMatch : public String::Match {
882  public:
VectorBackedMatch(Isolate * isolate,Handle<String> subject,Handle<String> match,int match_position,ZoneVector<Handle<Object>> * captures)883   VectorBackedMatch(Isolate* isolate, Handle<String> subject,
884                     Handle<String> match, int match_position,
885                     ZoneVector<Handle<Object>>* captures)
886       : isolate_(isolate),
887         match_(match),
888         match_position_(match_position),
889         captures_(captures) {
890     subject_ = String::Flatten(subject);
891   }
892 
GetMatch()893   Handle<String> GetMatch() override { return match_; }
894 
GetCapture(int i,bool * capture_exists)895   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
896     Handle<Object> capture_obj = captures_->at(i);
897     if (capture_obj->IsUndefined(isolate_)) {
898       *capture_exists = false;
899       return isolate_->factory()->empty_string();
900     }
901     *capture_exists = true;
902     return Object::ToString(isolate_, capture_obj);
903   }
904 
GetPrefix()905   Handle<String> GetPrefix() override {
906     return isolate_->factory()->NewSubString(subject_, 0, match_position_);
907   }
908 
GetSuffix()909   Handle<String> GetSuffix() override {
910     const int match_end_position = match_position_ + match_->length();
911     return isolate_->factory()->NewSubString(subject_, match_end_position,
912                                              subject_->length());
913   }
914 
CaptureCount()915   int CaptureCount() override { return static_cast<int>(captures_->size()); }
916 
~VectorBackedMatch()917   virtual ~VectorBackedMatch() {}
918 
919  private:
920   Isolate* isolate_;
921   Handle<String> subject_;
922   Handle<String> match_;
923   const int match_position_;
924   ZoneVector<Handle<Object>>* captures_;
925 };
926 
927 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
928 // separate last match info.  See comment on that function.
929 template <bool has_capture>
SearchRegExpMultiple(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_array,Handle<JSArray> result_array)930 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
931                                     Handle<JSRegExp> regexp,
932                                     Handle<RegExpMatchInfo> last_match_array,
933                                     Handle<JSArray> result_array) {
934   DCHECK(subject->IsFlat());
935   DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
936 
937   int capture_count = regexp->CaptureCount();
938   int subject_length = subject->length();
939 
940   static const int kMinLengthToCache = 0x1000;
941 
942   if (subject_length > kMinLengthToCache) {
943     FixedArray* last_match_cache;
944     Object* cached_answer = RegExpResultsCache::Lookup(
945         isolate->heap(), *subject, regexp->data(), &last_match_cache,
946         RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
947     if (cached_answer->IsFixedArray()) {
948       int capture_registers = (capture_count + 1) * 2;
949       int32_t* last_match = NewArray<int32_t>(capture_registers);
950       for (int i = 0; i < capture_registers; i++) {
951         last_match[i] = Smi::cast(last_match_cache->get(i))->value();
952       }
953       Handle<FixedArray> cached_fixed_array =
954           Handle<FixedArray>(FixedArray::cast(cached_answer));
955       // The cache FixedArray is a COW-array and we need to return a copy.
956       Handle<FixedArray> copied_fixed_array =
957           isolate->factory()->CopyFixedArrayWithMap(
958               cached_fixed_array, isolate->factory()->fixed_array_map());
959       JSArray::SetContent(result_array, copied_fixed_array);
960       RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
961                                    last_match);
962       DeleteArray(last_match);
963       return *result_array;
964     }
965   }
966 
967   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
968   if (global_cache.HasException()) return isolate->heap()->exception();
969 
970   // Ensured in Runtime_RegExpExecMultiple.
971   DCHECK(result_array->HasFastObjectElements());
972   Handle<FixedArray> result_elements(
973       FixedArray::cast(result_array->elements()));
974   if (result_elements->length() < 16) {
975     result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
976   }
977 
978   FixedArrayBuilder builder(result_elements);
979 
980   // Position to search from.
981   int match_start = -1;
982   int match_end = 0;
983   bool first = true;
984 
985   // Two smis before and after the match, for very long strings.
986   static const int kMaxBuilderEntriesPerRegExpMatch = 5;
987 
988   while (true) {
989     int32_t* current_match = global_cache.FetchNext();
990     if (current_match == NULL) break;
991     match_start = current_match[0];
992     builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
993     if (match_end < match_start) {
994       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
995                                                 match_start);
996     }
997     match_end = current_match[1];
998     {
999       // Avoid accumulating new handles inside loop.
1000       HandleScope temp_scope(isolate);
1001       Handle<String> match;
1002       if (!first) {
1003         match = isolate->factory()->NewProperSubString(subject, match_start,
1004                                                        match_end);
1005       } else {
1006         match =
1007             isolate->factory()->NewSubString(subject, match_start, match_end);
1008         first = false;
1009       }
1010 
1011       if (has_capture) {
1012         // Arguments array to replace function is match, captures, index and
1013         // subject, i.e., 3 + capture count in total.
1014         Handle<FixedArray> elements =
1015             isolate->factory()->NewFixedArray(3 + capture_count);
1016 
1017         elements->set(0, *match);
1018         for (int i = 1; i <= capture_count; i++) {
1019           int start = current_match[i * 2];
1020           if (start >= 0) {
1021             int end = current_match[i * 2 + 1];
1022             DCHECK(start <= end);
1023             Handle<String> substring =
1024                 isolate->factory()->NewSubString(subject, start, end);
1025             elements->set(i, *substring);
1026           } else {
1027             DCHECK(current_match[i * 2 + 1] < 0);
1028             elements->set(i, isolate->heap()->undefined_value());
1029           }
1030         }
1031         elements->set(capture_count + 1, Smi::FromInt(match_start));
1032         elements->set(capture_count + 2, *subject);
1033         builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1034       } else {
1035         builder.Add(*match);
1036       }
1037     }
1038   }
1039 
1040   if (global_cache.HasException()) return isolate->heap()->exception();
1041 
1042   if (match_start >= 0) {
1043     // Finished matching, with at least one match.
1044     if (match_end < subject_length) {
1045       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1046                                                 subject_length);
1047     }
1048 
1049     RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
1050                                  global_cache.LastSuccessfulMatch());
1051 
1052     if (subject_length > kMinLengthToCache) {
1053       // Store the last successful match into the array for caching.
1054       // TODO(yangguo): do not expose last match to JS and simplify caching.
1055       int capture_registers = (capture_count + 1) * 2;
1056       Handle<FixedArray> last_match_cache =
1057           isolate->factory()->NewFixedArray(capture_registers);
1058       int32_t* last_match = global_cache.LastSuccessfulMatch();
1059       for (int i = 0; i < capture_registers; i++) {
1060         last_match_cache->set(i, Smi::FromInt(last_match[i]));
1061       }
1062       Handle<FixedArray> result_fixed_array = builder.array();
1063       result_fixed_array->Shrink(builder.length());
1064       // Cache the result and copy the FixedArray into a COW array.
1065       Handle<FixedArray> copied_fixed_array =
1066           isolate->factory()->CopyFixedArrayWithMap(
1067               result_fixed_array, isolate->factory()->fixed_array_map());
1068       RegExpResultsCache::Enter(
1069           isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1070           last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1071     }
1072     return *builder.ToJSArray(result_array);
1073   } else {
1074     return isolate->heap()->null_value();  // No matches at all.
1075   }
1076 }
1077 
StringReplaceNonGlobalRegExpWithFunction(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<Object> replace_obj)1078 MUST_USE_RESULT MaybeHandle<String> StringReplaceNonGlobalRegExpWithFunction(
1079     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
1080     Handle<Object> replace_obj) {
1081   Factory* factory = isolate->factory();
1082   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1083 
1084   // TODO(jgruber): This is a pattern we could refactor.
1085   Handle<Object> match_indices_obj;
1086   ASSIGN_RETURN_ON_EXCEPTION(
1087       isolate, match_indices_obj,
1088       RegExpImpl::Exec(regexp, subject, 0, last_match_info), String);
1089 
1090   if (match_indices_obj->IsNull(isolate)) {
1091     RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1092                         String);
1093     return subject;
1094   }
1095 
1096   Handle<RegExpMatchInfo> match_indices =
1097       Handle<RegExpMatchInfo>::cast(match_indices_obj);
1098 
1099   const int index = match_indices->Capture(0);
1100   const int end_of_match = match_indices->Capture(1);
1101 
1102   IncrementalStringBuilder builder(isolate);
1103   builder.AppendString(factory->NewSubString(subject, 0, index));
1104 
1105   // Compute the parameter list consisting of the match, captures, index,
1106   // and subject for the replace function invocation.
1107   // The number of captures plus one for the match.
1108   const int m = match_indices->NumberOfCaptureRegisters() / 2;
1109 
1110   const int argc = m + 2;
1111   ScopedVector<Handle<Object>> argv(argc);
1112 
1113   for (int j = 0; j < m; j++) {
1114     bool ok;
1115     Handle<String> capture =
1116         RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1117     if (ok) {
1118       argv[j] = capture;
1119     } else {
1120       argv[j] = factory->undefined_value();
1121     }
1122   }
1123 
1124   argv[argc - 2] = handle(Smi::FromInt(index), isolate);
1125   argv[argc - 1] = subject;
1126 
1127   Handle<Object> replacement_obj;
1128   ASSIGN_RETURN_ON_EXCEPTION(
1129       isolate, replacement_obj,
1130       Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1131                       argv.start()),
1132       String);
1133 
1134   Handle<String> replacement;
1135   ASSIGN_RETURN_ON_EXCEPTION(
1136       isolate, replacement, Object::ToString(isolate, replacement_obj), String);
1137 
1138   builder.AppendString(replacement);
1139   builder.AppendString(
1140       factory->NewSubString(subject, end_of_match, subject->length()));
1141 
1142   return builder.Finish();
1143 }
1144 
1145 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1146 // doesn't properly call the underlying exec method.
RegExpReplace(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> string,Handle<Object> replace_obj)1147 MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
1148                                                   Handle<JSRegExp> regexp,
1149                                                   Handle<String> string,
1150                                                   Handle<Object> replace_obj) {
1151   Factory* factory = isolate->factory();
1152 
1153   // TODO(jgruber): We need the even stricter guarantee of an unmodified
1154   // JSRegExp map here for access to GetFlags to be legal.
1155   const int flags = regexp->GetFlags();
1156   const bool global = (flags & JSRegExp::kGlobal) != 0;
1157 
1158   // Functional fast-paths are dispatched directly by replace builtin.
1159   DCHECK(!replace_obj->IsCallable());
1160 
1161   Handle<String> replace;
1162   ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
1163                              Object::ToString(isolate, replace_obj), String);
1164   replace = String::Flatten(replace);
1165 
1166   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1167 
1168   if (!global) {
1169     // Non-global regexp search, string replace.
1170 
1171     Handle<Object> match_indices_obj;
1172     ASSIGN_RETURN_ON_EXCEPTION(
1173         isolate, match_indices_obj,
1174         RegExpImpl::Exec(regexp, string, 0, last_match_info), String);
1175 
1176     if (match_indices_obj->IsNull(isolate)) {
1177       RETURN_ON_EXCEPTION(
1178           isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0), String);
1179       return string;
1180     }
1181 
1182     auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1183 
1184     const int start_index = match_indices->Capture(0);
1185     const int end_index = match_indices->Capture(1);
1186 
1187     IncrementalStringBuilder builder(isolate);
1188     builder.AppendString(factory->NewSubString(string, 0, start_index));
1189 
1190     if (replace->length() > 0) {
1191       MatchInfoBackedMatch m(isolate, string, match_indices);
1192       Handle<String> replacement;
1193       ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1194                                  String::GetSubstitution(isolate, &m, replace),
1195                                  String);
1196       builder.AppendString(replacement);
1197     }
1198 
1199     builder.AppendString(
1200         factory->NewSubString(string, end_index, string->length()));
1201     return builder.Finish();
1202   } else {
1203     // Global regexp search, string replace.
1204     DCHECK(global);
1205     RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1206                         String);
1207 
1208     if (replace->length() == 0) {
1209       if (string->HasOnlyOneByteChars()) {
1210         Object* result =
1211             StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1212                 isolate, string, regexp, last_match_info);
1213         return handle(String::cast(result), isolate);
1214       } else {
1215         Object* result =
1216             StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1217                 isolate, string, regexp, last_match_info);
1218         return handle(String::cast(result), isolate);
1219       }
1220     }
1221 
1222     Object* result = StringReplaceGlobalRegExpWithString(
1223         isolate, string, regexp, replace, last_match_info);
1224     if (result->IsString()) {
1225       return handle(String::cast(result), isolate);
1226     } else {
1227       return MaybeHandle<String>();
1228     }
1229   }
1230 
1231   UNREACHABLE();
1232   return MaybeHandle<String>();
1233 }
1234 
1235 }  // namespace
1236 
1237 // This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)1238 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1239   HandleScope handles(isolate);
1240   DCHECK(args.length() == 4);
1241 
1242   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1243   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1244   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1245   CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1246   CHECK(result_array->HasFastObjectElements());
1247 
1248   subject = String::Flatten(subject);
1249   CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1250 
1251   if (regexp->CaptureCount() == 0) {
1252     return SearchRegExpMultiple<false>(isolate, subject, regexp,
1253                                        last_match_info, result_array);
1254   } else {
1255     return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1256                                       result_array);
1257   }
1258 }
1259 
RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction)1260 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1261   HandleScope scope(isolate);
1262   DCHECK(args.length() == 3);
1263 
1264   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1265   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1266   CONVERT_ARG_HANDLE_CHECKED(JSObject, replace, 2);
1267 
1268   RETURN_RESULT_OR_FAILURE(isolate, StringReplaceNonGlobalRegExpWithFunction(
1269                                         isolate, subject, regexp, replace));
1270 }
1271 
1272 // Slow path for:
1273 // ES#sec-regexp.prototype-@@replace
1274 // RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplace)1275 RUNTIME_FUNCTION(Runtime_RegExpReplace) {
1276   HandleScope scope(isolate);
1277   DCHECK(args.length() == 3);
1278 
1279   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1280   CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1281   Handle<Object> replace_obj = args.at<Object>(2);
1282 
1283   Factory* factory = isolate->factory();
1284 
1285   string = String::Flatten(string);
1286 
1287   // Fast-path for unmodified JSRegExps.
1288   if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1289     RETURN_RESULT_OR_FAILURE(
1290         isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string,
1291                                replace_obj));
1292   }
1293 
1294   const int length = string->length();
1295   const bool functional_replace = replace_obj->IsCallable();
1296 
1297   Handle<String> replace;
1298   if (!functional_replace) {
1299     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1300                                        Object::ToString(isolate, replace_obj));
1301   }
1302 
1303   Handle<Object> global_obj;
1304   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1305       isolate, global_obj,
1306       JSReceiver::GetProperty(recv, factory->global_string()));
1307   const bool global = global_obj->BooleanValue();
1308 
1309   bool unicode = false;
1310   if (global) {
1311     Handle<Object> unicode_obj;
1312     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1313         isolate, unicode_obj,
1314         JSReceiver::GetProperty(recv, factory->unicode_string()));
1315     unicode = unicode_obj->BooleanValue();
1316 
1317     RETURN_FAILURE_ON_EXCEPTION(isolate,
1318                                 RegExpUtils::SetLastIndex(isolate, recv, 0));
1319   }
1320 
1321   Zone zone(isolate->allocator(), ZONE_NAME);
1322   ZoneVector<Handle<Object>> results(&zone);
1323 
1324   while (true) {
1325     Handle<Object> result;
1326     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1327         isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1328                                                  factory->undefined_value()));
1329 
1330     if (result->IsNull(isolate)) break;
1331 
1332     results.push_back(result);
1333     if (!global) break;
1334 
1335     Handle<Object> match_obj;
1336     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1337                                        Object::GetElement(isolate, result, 0));
1338 
1339     Handle<String> match;
1340     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1341                                        Object::ToString(isolate, match_obj));
1342 
1343     if (match->length() == 0) {
1344       RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1345                                                isolate, recv, string, unicode));
1346     }
1347   }
1348 
1349   // TODO(jgruber): Look into ReplacementStringBuilder instead.
1350   IncrementalStringBuilder builder(isolate);
1351   int next_source_position = 0;
1352 
1353   for (const auto& result : results) {
1354     Handle<Object> captures_length_obj;
1355     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1356         isolate, captures_length_obj,
1357         Object::GetProperty(result, factory->length_string()));
1358 
1359     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1360         isolate, captures_length_obj,
1361         Object::ToLength(isolate, captures_length_obj));
1362     const int captures_length =
1363         std::max(Handle<Smi>::cast(captures_length_obj)->value(), 0);
1364 
1365     Handle<Object> match_obj;
1366     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1367                                        Object::GetElement(isolate, result, 0));
1368 
1369     Handle<String> match;
1370     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1371                                        Object::ToString(isolate, match_obj));
1372 
1373     const int match_length = match->length();
1374 
1375     Handle<Object> position_obj;
1376     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1377         isolate, position_obj,
1378         Object::GetProperty(result, factory->index_string()));
1379 
1380     // TODO(jgruber): Extract and correct error handling. Since we can go up to
1381     // 2^53 - 1 (at least for ToLength), we might actually need uint64_t here?
1382     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1383         isolate, position_obj, Object::ToInteger(isolate, position_obj));
1384     const int position =
1385         std::max(std::min(Handle<Smi>::cast(position_obj)->value(), length), 0);
1386 
1387     ZoneVector<Handle<Object>> captures(&zone);
1388     for (int n = 0; n < captures_length; n++) {
1389       Handle<Object> capture;
1390       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1391           isolate, capture, Object::GetElement(isolate, result, n));
1392 
1393       if (!capture->IsUndefined(isolate)) {
1394         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1395                                            Object::ToString(isolate, capture));
1396       }
1397       captures.push_back(capture);
1398     }
1399 
1400     Handle<String> replacement;
1401     if (functional_replace) {
1402       const int argc = captures_length + 2;
1403       ScopedVector<Handle<Object>> argv(argc);
1404 
1405       for (int j = 0; j < captures_length; j++) {
1406         argv[j] = captures[j];
1407       }
1408 
1409       argv[captures_length] = handle(Smi::FromInt(position), isolate);
1410       argv[captures_length + 1] = string;
1411 
1412       Handle<Object> replacement_obj;
1413       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1414           isolate, replacement_obj,
1415           Execution::Call(isolate, replace_obj, factory->undefined_value(),
1416                           argc, argv.start()));
1417 
1418       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1419           isolate, replacement, Object::ToString(isolate, replacement_obj));
1420     } else {
1421       VectorBackedMatch m(isolate, string, match, position, &captures);
1422       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1423           isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1424     }
1425 
1426     if (position >= next_source_position) {
1427       builder.AppendString(
1428           factory->NewSubString(string, next_source_position, position));
1429       builder.AppendString(replacement);
1430 
1431       next_source_position = position + match_length;
1432     }
1433   }
1434 
1435   if (next_source_position < length) {
1436     builder.AppendString(
1437         factory->NewSubString(string, next_source_position, length));
1438   }
1439 
1440   RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1441 }
1442 
RUNTIME_FUNCTION(Runtime_RegExpExecReThrow)1443 RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
1444   SealHandleScope shs(isolate);
1445   DCHECK(args.length() == 4);
1446   Object* exception = isolate->pending_exception();
1447   isolate->clear_pending_exception();
1448   return isolate->ReThrow(exception);
1449 }
1450 
1451 
RUNTIME_FUNCTION(Runtime_IsRegExp)1452 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1453   SealHandleScope shs(isolate);
1454   DCHECK(args.length() == 1);
1455   CONVERT_ARG_CHECKED(Object, obj, 0);
1456   return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1457 }
1458 
1459 }  // namespace internal
1460 }  // namespace v8
1461