• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/runtime/runtime-utils.h"
6 
7 #include "src/arguments.h"
8 #include "src/conversions-inl.h"
9 #include "src/isolate-inl.h"
10 #include "src/messages.h"
11 #include "src/regexp/jsregexp-inl.h"
12 #include "src/regexp/jsregexp.h"
13 #include "src/regexp/regexp-utils.h"
14 #include "src/string-builder.h"
15 #include "src/string-search.h"
16 
17 namespace v8 {
18 namespace internal {
19 
20 class CompiledReplacement {
21  public:
CompiledReplacement(Zone * zone)22   explicit CompiledReplacement(Zone* zone)
23       : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
24 
25   // Return whether the replacement is simple.
26   bool Compile(Handle<String> replacement, int capture_count,
27                int subject_length);
28 
29   // Use Apply only if Compile returned false.
30   void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
31              int32_t* match);
32 
33   // Number of distinct parts of the replacement pattern.
parts()34   int parts() { return parts_.length(); }
35 
zone() const36   Zone* zone() const { return zone_; }
37 
38  private:
39   enum PartType {
40     SUBJECT_PREFIX = 1,
41     SUBJECT_SUFFIX,
42     SUBJECT_CAPTURE,
43     REPLACEMENT_SUBSTRING,
44     REPLACEMENT_STRING,
45     NUMBER_OF_PART_TYPES
46   };
47 
48   struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart49     static inline ReplacementPart SubjectMatch() {
50       return ReplacementPart(SUBJECT_CAPTURE, 0);
51     }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart52     static inline ReplacementPart SubjectCapture(int capture_index) {
53       return ReplacementPart(SUBJECT_CAPTURE, capture_index);
54     }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart55     static inline ReplacementPart SubjectPrefix() {
56       return ReplacementPart(SUBJECT_PREFIX, 0);
57     }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart58     static inline ReplacementPart SubjectSuffix(int subject_length) {
59       return ReplacementPart(SUBJECT_SUFFIX, subject_length);
60     }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart61     static inline ReplacementPart ReplacementString() {
62       return ReplacementPart(REPLACEMENT_STRING, 0);
63     }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart64     static inline ReplacementPart ReplacementSubString(int from, int to) {
65       DCHECK(from >= 0);
66       DCHECK(to > from);
67       return ReplacementPart(-from, to);
68     }
69 
70     // If tag <= 0 then it is the negation of a start index of a substring of
71     // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart72     ReplacementPart(int tag, int data) : tag(tag), data(data) {
73       // Must be non-positive or a PartType value.
74       DCHECK(tag < NUMBER_OF_PART_TYPES);
75     }
76     // Either a value of PartType or a non-positive number that is
77     // the negation of an index into the replacement string.
78     int tag;
79     // The data value's interpretation depends on the value of tag:
80     // tag == SUBJECT_PREFIX ||
81     // tag == SUBJECT_SUFFIX:  data is unused.
82     // tag == SUBJECT_CAPTURE: data is the number of the capture.
83     // tag == REPLACEMENT_SUBSTRING ||
84     // tag == REPLACEMENT_STRING:    data is index into array of substrings
85     //                               of the replacement string.
86     // tag <= 0: Temporary representation of the substring of the replacement
87     //           string ranging over -tag .. data.
88     //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
89     //           substring objects.
90     int data;
91   };
92 
93   template <typename Char>
ParseReplacementPattern(ZoneList<ReplacementPart> * parts,Vector<Char> characters,int capture_count,int subject_length,Zone * zone)94   bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
95                                Vector<Char> characters, int capture_count,
96                                int subject_length, Zone* zone) {
97     int length = characters.length();
98     int last = 0;
99     for (int i = 0; i < length; i++) {
100       Char c = characters[i];
101       if (c == '$') {
102         int next_index = i + 1;
103         if (next_index == length) {  // No next character!
104           break;
105         }
106         Char c2 = characters[next_index];
107         switch (c2) {
108           case '$':
109             if (i > last) {
110               // There is a substring before. Include the first "$".
111               parts->Add(
112                   ReplacementPart::ReplacementSubString(last, next_index),
113                   zone);
114               last = next_index + 1;  // Continue after the second "$".
115             } else {
116               // Let the next substring start with the second "$".
117               last = next_index;
118             }
119             i = next_index;
120             break;
121           case '`':
122             if (i > last) {
123               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
124             }
125             parts->Add(ReplacementPart::SubjectPrefix(), zone);
126             i = next_index;
127             last = i + 1;
128             break;
129           case '\'':
130             if (i > last) {
131               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
132             }
133             parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
134             i = next_index;
135             last = i + 1;
136             break;
137           case '&':
138             if (i > last) {
139               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
140             }
141             parts->Add(ReplacementPart::SubjectMatch(), zone);
142             i = next_index;
143             last = i + 1;
144             break;
145           case '0':
146           case '1':
147           case '2':
148           case '3':
149           case '4':
150           case '5':
151           case '6':
152           case '7':
153           case '8':
154           case '9': {
155             int capture_ref = c2 - '0';
156             if (capture_ref > capture_count) {
157               i = next_index;
158               continue;
159             }
160             int second_digit_index = next_index + 1;
161             if (second_digit_index < length) {
162               // Peek ahead to see if we have two digits.
163               Char c3 = characters[second_digit_index];
164               if ('0' <= c3 && c3 <= '9') {  // Double digits.
165                 int double_digit_ref = capture_ref * 10 + c3 - '0';
166                 if (double_digit_ref <= capture_count) {
167                   next_index = second_digit_index;
168                   capture_ref = double_digit_ref;
169                 }
170               }
171             }
172             if (capture_ref > 0) {
173               if (i > last) {
174                 parts->Add(ReplacementPart::ReplacementSubString(last, i),
175                            zone);
176               }
177               DCHECK(capture_ref <= capture_count);
178               parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
179               last = next_index + 1;
180             }
181             i = next_index;
182             break;
183           }
184           default:
185             i = next_index;
186             break;
187         }
188       }
189     }
190     if (length > last) {
191       if (last == 0) {
192         // Replacement is simple.  Do not use Apply to do the replacement.
193         return true;
194       } else {
195         parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
196       }
197     }
198     return false;
199   }
200 
201   ZoneList<ReplacementPart> parts_;
202   ZoneList<Handle<String> > replacement_substrings_;
203   Zone* zone_;
204 };
205 
206 
Compile(Handle<String> replacement,int capture_count,int subject_length)207 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
208                                   int subject_length) {
209   {
210     DisallowHeapAllocation no_gc;
211     String::FlatContent content = replacement->GetFlatContent();
212     DCHECK(content.IsFlat());
213     bool simple = false;
214     if (content.IsOneByte()) {
215       simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
216                                        capture_count, subject_length, zone());
217     } else {
218       DCHECK(content.IsTwoByte());
219       simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
220                                        capture_count, subject_length, zone());
221     }
222     if (simple) return true;
223   }
224 
225   Isolate* isolate = replacement->GetIsolate();
226   // Find substrings of replacement string and create them as String objects.
227   int substring_index = 0;
228   for (int i = 0, n = parts_.length(); i < n; i++) {
229     int tag = parts_[i].tag;
230     if (tag <= 0) {  // A replacement string slice.
231       int from = -tag;
232       int to = parts_[i].data;
233       replacement_substrings_.Add(
234           isolate->factory()->NewSubString(replacement, from, to), zone());
235       parts_[i].tag = REPLACEMENT_SUBSTRING;
236       parts_[i].data = substring_index;
237       substring_index++;
238     } else if (tag == REPLACEMENT_STRING) {
239       replacement_substrings_.Add(replacement, zone());
240       parts_[i].data = substring_index;
241       substring_index++;
242     }
243   }
244   return false;
245 }
246 
247 
Apply(ReplacementStringBuilder * builder,int match_from,int match_to,int32_t * match)248 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
249                                 int match_from, int match_to, int32_t* match) {
250   DCHECK_LT(0, parts_.length());
251   for (int i = 0, n = parts_.length(); i < n; i++) {
252     ReplacementPart part = parts_[i];
253     switch (part.tag) {
254       case SUBJECT_PREFIX:
255         if (match_from > 0) builder->AddSubjectSlice(0, match_from);
256         break;
257       case SUBJECT_SUFFIX: {
258         int subject_length = part.data;
259         if (match_to < subject_length) {
260           builder->AddSubjectSlice(match_to, subject_length);
261         }
262         break;
263       }
264       case SUBJECT_CAPTURE: {
265         int capture = part.data;
266         int from = match[capture * 2];
267         int to = match[capture * 2 + 1];
268         if (from >= 0 && to > from) {
269           builder->AddSubjectSlice(from, to);
270         }
271         break;
272       }
273       case REPLACEMENT_SUBSTRING:
274       case REPLACEMENT_STRING:
275         builder->AddString(replacement_substrings_[part.data]);
276         break;
277       default:
278         UNREACHABLE();
279     }
280   }
281 }
282 
FindOneByteStringIndices(Vector<const uint8_t> subject,uint8_t pattern,List<int> * indices,unsigned int limit)283 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
284                               List<int>* indices, unsigned int limit) {
285   DCHECK(limit > 0);
286   // Collect indices of pattern in subject using memchr.
287   // Stop after finding at most limit values.
288   const uint8_t* subject_start = subject.start();
289   const uint8_t* subject_end = subject_start + subject.length();
290   const uint8_t* pos = subject_start;
291   while (limit > 0) {
292     pos = reinterpret_cast<const uint8_t*>(
293         memchr(pos, pattern, subject_end - pos));
294     if (pos == NULL) return;
295     indices->Add(static_cast<int>(pos - subject_start));
296     pos++;
297     limit--;
298   }
299 }
300 
FindTwoByteStringIndices(const Vector<const uc16> subject,uc16 pattern,List<int> * indices,unsigned int limit)301 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
302                               List<int>* indices, unsigned int limit) {
303   DCHECK(limit > 0);
304   const uc16* subject_start = subject.start();
305   const uc16* subject_end = subject_start + subject.length();
306   for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
307     if (*pos == pattern) {
308       indices->Add(static_cast<int>(pos - subject_start));
309       limit--;
310     }
311   }
312 }
313 
314 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate * isolate,Vector<const SubjectChar> subject,Vector<const PatternChar> pattern,List<int> * indices,unsigned int limit)315 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
316                        Vector<const PatternChar> pattern, List<int>* indices,
317                        unsigned int limit) {
318   DCHECK(limit > 0);
319   // Collect indices of pattern in subject.
320   // Stop after finding at most limit values.
321   int pattern_length = pattern.length();
322   int index = 0;
323   StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
324   while (limit > 0) {
325     index = search.Search(subject, index);
326     if (index < 0) return;
327     indices->Add(index);
328     index += pattern_length;
329     limit--;
330   }
331 }
332 
FindStringIndicesDispatch(Isolate * isolate,String * subject,String * pattern,List<int> * indices,unsigned int limit)333 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
334                                String* pattern, List<int>* indices,
335                                unsigned int limit) {
336   {
337     DisallowHeapAllocation no_gc;
338     String::FlatContent subject_content = subject->GetFlatContent();
339     String::FlatContent pattern_content = pattern->GetFlatContent();
340     DCHECK(subject_content.IsFlat());
341     DCHECK(pattern_content.IsFlat());
342     if (subject_content.IsOneByte()) {
343       Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
344       if (pattern_content.IsOneByte()) {
345         Vector<const uint8_t> pattern_vector =
346             pattern_content.ToOneByteVector();
347         if (pattern_vector.length() == 1) {
348           FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
349                                    limit);
350         } else {
351           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
352                             limit);
353         }
354       } else {
355         FindStringIndices(isolate, subject_vector,
356                           pattern_content.ToUC16Vector(), indices, limit);
357       }
358     } else {
359       Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
360       if (pattern_content.IsOneByte()) {
361         Vector<const uint8_t> pattern_vector =
362             pattern_content.ToOneByteVector();
363         if (pattern_vector.length() == 1) {
364           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
365                                    limit);
366         } else {
367           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
368                             limit);
369         }
370       } else {
371         Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
372         if (pattern_vector.length() == 1) {
373           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
374                                    limit);
375         } else {
376           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
377                             limit);
378         }
379       }
380     }
381   }
382 }
383 
384 namespace {
GetRewoundRegexpIndicesList(Isolate * isolate)385 List<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
386   List<int>* list = isolate->regexp_indices();
387   list->Rewind(0);
388   return list;
389 }
390 
TruncateRegexpIndicesList(Isolate * isolate)391 void TruncateRegexpIndicesList(Isolate* isolate) {
392   // Same size as smallest zone segment, preserving behavior from the
393   // runtime zone.
394   static const int kMaxRegexpIndicesListCapacity = 8 * KB;
395   if (isolate->regexp_indices()->capacity() > kMaxRegexpIndicesListCapacity) {
396     isolate->regexp_indices()->Clear();  //  Throw away backing storage
397   }
398 }
399 }  // namespace
400 
401 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> pattern_regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)402 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
403     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
404     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
405   DCHECK(subject->IsFlat());
406   DCHECK(replacement->IsFlat());
407 
408   List<int>* indices = GetRewoundRegexpIndicesList(isolate);
409 
410   DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
411   String* pattern =
412       String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
413   int subject_len = subject->length();
414   int pattern_len = pattern->length();
415   int replacement_len = replacement->length();
416 
417   FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xffffffff);
418 
419   int matches = indices->length();
420   if (matches == 0) return *subject;
421 
422   // Detect integer overflow.
423   int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
424                            static_cast<int64_t>(pattern_len)) *
425                               static_cast<int64_t>(matches) +
426                           static_cast<int64_t>(subject_len);
427   int result_len;
428   if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
429     STATIC_ASSERT(String::kMaxLength < kMaxInt);
430     result_len = kMaxInt;  // Provoke exception.
431   } else {
432     result_len = static_cast<int>(result_len_64);
433   }
434   if (result_len == 0) {
435     return isolate->heap()->empty_string();
436   }
437 
438   int subject_pos = 0;
439   int result_pos = 0;
440 
441   MaybeHandle<SeqString> maybe_res;
442   if (ResultSeqString::kHasOneByteEncoding) {
443     maybe_res = isolate->factory()->NewRawOneByteString(result_len);
444   } else {
445     maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
446   }
447   Handle<SeqString> untyped_res;
448   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
449   Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
450 
451   for (int i = 0; i < matches; i++) {
452     // Copy non-matched subject content.
453     if (subject_pos < indices->at(i)) {
454       String::WriteToFlat(*subject, result->GetChars() + result_pos,
455                           subject_pos, indices->at(i));
456       result_pos += indices->at(i) - subject_pos;
457     }
458 
459     // Replace match.
460     if (replacement_len > 0) {
461       String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
462                           replacement_len);
463       result_pos += replacement_len;
464     }
465 
466     subject_pos = indices->at(i) + pattern_len;
467   }
468   // Add remaining subject content at the end.
469   if (subject_pos < subject_len) {
470     String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
471                         subject_len);
472   }
473 
474   int32_t match_indices[] = {indices->at(matches - 1),
475                              indices->at(matches - 1) + pattern_len};
476   RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
477 
478   TruncateRegexpIndicesList(isolate);
479 
480   return *result;
481 }
482 
StringReplaceGlobalRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)483 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
484     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
485     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
486   DCHECK(subject->IsFlat());
487   DCHECK(replacement->IsFlat());
488 
489   int capture_count = regexp->CaptureCount();
490   int subject_length = subject->length();
491 
492   // CompiledReplacement uses zone allocation.
493   Zone zone(isolate->allocator(), ZONE_NAME);
494   CompiledReplacement compiled_replacement(&zone);
495   bool simple_replace =
496       compiled_replacement.Compile(replacement, capture_count, subject_length);
497 
498   // Shortcut for simple non-regexp global replacements
499   if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
500     if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
501       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
502           isolate, subject, regexp, replacement, last_match_info);
503     } else {
504       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
505           isolate, subject, regexp, replacement, last_match_info);
506     }
507   }
508 
509   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
510   if (global_cache.HasException()) return isolate->heap()->exception();
511 
512   int32_t* current_match = global_cache.FetchNext();
513   if (current_match == NULL) {
514     if (global_cache.HasException()) return isolate->heap()->exception();
515     return *subject;
516   }
517 
518   // Guessing the number of parts that the final result string is built
519   // from. Global regexps can match any number of times, so we guess
520   // conservatively.
521   int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
522   ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
523 
524   // Number of parts added by compiled replacement plus preceeding
525   // string and possibly suffix after last match.  It is possible for
526   // all components to use two elements when encoded as two smis.
527   const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
528 
529   int prev = 0;
530 
531   do {
532     builder.EnsureCapacity(parts_added_per_loop);
533 
534     int start = current_match[0];
535     int end = current_match[1];
536 
537     if (prev < start) {
538       builder.AddSubjectSlice(prev, start);
539     }
540 
541     if (simple_replace) {
542       builder.AddString(replacement);
543     } else {
544       compiled_replacement.Apply(&builder, start, end, current_match);
545     }
546     prev = end;
547 
548     current_match = global_cache.FetchNext();
549   } while (current_match != NULL);
550 
551   if (global_cache.HasException()) return isolate->heap()->exception();
552 
553   if (prev < subject_length) {
554     builder.EnsureCapacity(2);
555     builder.AddSubjectSlice(prev, subject_length);
556   }
557 
558   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
559                                global_cache.LastSuccessfulMatch());
560 
561   RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
562 }
563 
564 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_info)565 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
566     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
567     Handle<RegExpMatchInfo> last_match_info) {
568   DCHECK(subject->IsFlat());
569 
570   // Shortcut for simple non-regexp global replacements
571   if (regexp->TypeTag() == JSRegExp::ATOM) {
572     Handle<String> empty_string = isolate->factory()->empty_string();
573     if (subject->IsOneByteRepresentation()) {
574       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
575           isolate, subject, regexp, empty_string, last_match_info);
576     } else {
577       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
578           isolate, subject, regexp, empty_string, last_match_info);
579     }
580   }
581 
582   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
583   if (global_cache.HasException()) return isolate->heap()->exception();
584 
585   int32_t* current_match = global_cache.FetchNext();
586   if (current_match == NULL) {
587     if (global_cache.HasException()) return isolate->heap()->exception();
588     return *subject;
589   }
590 
591   int start = current_match[0];
592   int end = current_match[1];
593   int capture_count = regexp->CaptureCount();
594   int subject_length = subject->length();
595 
596   int new_length = subject_length - (end - start);
597   if (new_length == 0) return isolate->heap()->empty_string();
598 
599   Handle<ResultSeqString> answer;
600   if (ResultSeqString::kHasOneByteEncoding) {
601     answer = Handle<ResultSeqString>::cast(
602         isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
603   } else {
604     answer = Handle<ResultSeqString>::cast(
605         isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
606   }
607 
608   int prev = 0;
609   int position = 0;
610 
611   do {
612     start = current_match[0];
613     end = current_match[1];
614     if (prev < start) {
615       // Add substring subject[prev;start] to answer string.
616       String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
617       position += start - prev;
618     }
619     prev = end;
620 
621     current_match = global_cache.FetchNext();
622   } while (current_match != NULL);
623 
624   if (global_cache.HasException()) return isolate->heap()->exception();
625 
626   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
627                                global_cache.LastSuccessfulMatch());
628 
629   if (prev < subject_length) {
630     // Add substring subject[prev;length] to answer string.
631     String::WriteToFlat(*subject, answer->GetChars() + position, prev,
632                         subject_length);
633     position += subject_length - prev;
634   }
635 
636   if (position == 0) return isolate->heap()->empty_string();
637 
638   // Shorten string and fill
639   int string_size = ResultSeqString::SizeFor(position);
640   int allocated_string_size = ResultSeqString::SizeFor(new_length);
641   int delta = allocated_string_size - string_size;
642 
643   answer->set_length(position);
644   if (delta == 0) return *answer;
645 
646   Address end_of_string = answer->address() + string_size;
647   Heap* heap = isolate->heap();
648 
649   // The trimming is performed on a newly allocated object, which is on a
650   // fresly allocated page or on an already swept page. Hence, the sweeper
651   // thread can not get confused with the filler creation. No synchronization
652   // needed.
653   // TODO(hpayer): We should shrink the large object page if the size
654   // of the object changed significantly.
655   if (!heap->lo_space()->Contains(*answer)) {
656     heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
657   }
658   heap->AdjustLiveBytes(*answer, -delta);
659   return *answer;
660 }
661 
662 namespace {
663 
StringReplaceGlobalRegExpWithStringHelper(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)664 Object* StringReplaceGlobalRegExpWithStringHelper(
665     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
666     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
667   CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
668 
669   subject = String::Flatten(subject);
670 
671   if (replacement->length() == 0) {
672     if (subject->HasOnlyOneByteChars()) {
673       return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
674           isolate, subject, regexp, last_match_info);
675     } else {
676       return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
677           isolate, subject, regexp, last_match_info);
678     }
679   }
680 
681   replacement = String::Flatten(replacement);
682 
683   return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
684                                              replacement, last_match_info);
685 }
686 
687 }  // namespace
688 
RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString)689 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
690   HandleScope scope(isolate);
691   DCHECK_EQ(4, args.length());
692 
693   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
694   CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
695   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
696   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
697 
698   return StringReplaceGlobalRegExpWithStringHelper(
699       isolate, regexp, subject, replacement, last_match_info);
700 }
701 
RUNTIME_FUNCTION(Runtime_StringSplit)702 RUNTIME_FUNCTION(Runtime_StringSplit) {
703   HandleScope handle_scope(isolate);
704   DCHECK_EQ(3, args.length());
705   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
706   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
707   CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
708   CHECK(limit > 0);
709 
710   int subject_length = subject->length();
711   int pattern_length = pattern->length();
712   CHECK(pattern_length > 0);
713 
714   if (limit == 0xffffffffu) {
715     FixedArray* last_match_cache_unused;
716     Handle<Object> cached_answer(
717         RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
718                                    &last_match_cache_unused,
719                                    RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
720         isolate);
721     if (*cached_answer != Smi::kZero) {
722       // The cache FixedArray is a COW-array and can therefore be reused.
723       Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
724           Handle<FixedArray>::cast(cached_answer));
725       return *result;
726     }
727   }
728 
729   // The limit can be very large (0xffffffffu), but since the pattern
730   // isn't empty, we can never create more parts than ~half the length
731   // of the subject.
732 
733   subject = String::Flatten(subject);
734   pattern = String::Flatten(pattern);
735 
736   List<int>* indices = GetRewoundRegexpIndicesList(isolate);
737 
738   FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
739 
740   if (static_cast<uint32_t>(indices->length()) < limit) {
741     indices->Add(subject_length);
742   }
743 
744   // The list indices now contains the end of each part to create.
745 
746   // Create JSArray of substrings separated by separator.
747   int part_count = indices->length();
748 
749   Handle<JSArray> result =
750       isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count,
751                                      INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
752 
753   DCHECK(result->HasFastObjectElements());
754 
755   Handle<FixedArray> elements(FixedArray::cast(result->elements()));
756 
757   if (part_count == 1 && indices->at(0) == subject_length) {
758     elements->set(0, *subject);
759   } else {
760     int part_start = 0;
761     FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
762       int part_end = indices->at(i);
763       Handle<String> substring =
764           isolate->factory()->NewProperSubString(subject, part_start, part_end);
765       elements->set(i, *substring);
766       part_start = part_end + pattern_length;
767     });
768   }
769 
770   if (limit == 0xffffffffu) {
771     if (result->HasFastObjectElements()) {
772       RegExpResultsCache::Enter(isolate, subject, pattern, elements,
773                                 isolate->factory()->empty_fixed_array(),
774                                 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
775     }
776   }
777 
778   TruncateRegexpIndicesList(isolate);
779 
780   return *result;
781 }
782 
783 // ES##sec-regexpcreate
784 // RegExpCreate ( P, F )
RUNTIME_FUNCTION(Runtime_RegExpCreate)785 RUNTIME_FUNCTION(Runtime_RegExpCreate) {
786   HandleScope scope(isolate);
787   DCHECK_EQ(1, args.length());
788   CONVERT_ARG_HANDLE_CHECKED(Object, source_object, 0);
789 
790   Handle<String> source;
791   if (source_object->IsUndefined(isolate)) {
792     source = isolate->factory()->empty_string();
793   } else {
794     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
795         isolate, source, Object::ToString(isolate, source_object));
796   }
797 
798   Handle<Map> map(isolate->regexp_function()->initial_map());
799   Handle<JSRegExp> regexp =
800       Handle<JSRegExp>::cast(isolate->factory()->NewJSObjectFromMap(map));
801 
802   JSRegExp::Flags flags = JSRegExp::kNone;
803 
804   RETURN_FAILURE_ON_EXCEPTION(isolate,
805                               JSRegExp::Initialize(regexp, source, flags));
806 
807   return *regexp;
808 }
809 
RUNTIME_FUNCTION(Runtime_RegExpExec)810 RUNTIME_FUNCTION(Runtime_RegExpExec) {
811   HandleScope scope(isolate);
812   DCHECK_EQ(4, args.length());
813   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
814   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
815   CONVERT_INT32_ARG_CHECKED(index, 2);
816   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
817   // Due to the way the JS calls are constructed this must be less than the
818   // length of a string, i.e. it is always a Smi.  We check anyway for security.
819   CHECK(index >= 0);
820   CHECK(index <= subject->length());
821   isolate->counters()->regexp_entry_runtime()->Increment();
822   RETURN_RESULT_OR_FAILURE(
823       isolate, RegExpImpl::Exec(regexp, subject, index, last_match_info));
824 }
825 
RUNTIME_FUNCTION(Runtime_RegExpInternalReplace)826 RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) {
827   HandleScope scope(isolate);
828   DCHECK_EQ(3, args.length());
829   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
830   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
831   CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
832 
833   Handle<RegExpMatchInfo> internal_match_info =
834       isolate->regexp_internal_match_info();
835 
836   return StringReplaceGlobalRegExpWithStringHelper(
837       isolate, regexp, subject, replacement, internal_match_info);
838 }
839 
840 namespace {
841 
842 class MatchInfoBackedMatch : public String::Match {
843  public:
MatchInfoBackedMatch(Isolate * isolate,Handle<String> subject,Handle<RegExpMatchInfo> match_info)844   MatchInfoBackedMatch(Isolate* isolate, Handle<String> subject,
845                        Handle<RegExpMatchInfo> match_info)
846       : isolate_(isolate), match_info_(match_info) {
847     subject_ = String::Flatten(subject);
848   }
849 
GetMatch()850   Handle<String> GetMatch() override {
851     return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
852   }
853 
GetCapture(int i,bool * capture_exists)854   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
855     Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
856         isolate_, match_info_, i, capture_exists);
857     return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
858                              : isolate_->factory()->empty_string();
859   }
860 
GetPrefix()861   Handle<String> GetPrefix() override {
862     const int match_start = match_info_->Capture(0);
863     return isolate_->factory()->NewSubString(subject_, 0, match_start);
864   }
865 
GetSuffix()866   Handle<String> GetSuffix() override {
867     const int match_end = match_info_->Capture(1);
868     return isolate_->factory()->NewSubString(subject_, match_end,
869                                              subject_->length());
870   }
871 
CaptureCount()872   int CaptureCount() override {
873     return match_info_->NumberOfCaptureRegisters() / 2;
874   }
875 
~MatchInfoBackedMatch()876   virtual ~MatchInfoBackedMatch() {}
877 
878  private:
879   Isolate* isolate_;
880   Handle<String> subject_;
881   Handle<RegExpMatchInfo> match_info_;
882 };
883 
884 class VectorBackedMatch : public String::Match {
885  public:
VectorBackedMatch(Isolate * isolate,Handle<String> subject,Handle<String> match,int match_position,ZoneVector<Handle<Object>> * captures)886   VectorBackedMatch(Isolate* isolate, Handle<String> subject,
887                     Handle<String> match, int match_position,
888                     ZoneVector<Handle<Object>>* captures)
889       : isolate_(isolate),
890         match_(match),
891         match_position_(match_position),
892         captures_(captures) {
893     subject_ = String::Flatten(subject);
894   }
895 
GetMatch()896   Handle<String> GetMatch() override { return match_; }
897 
GetCapture(int i,bool * capture_exists)898   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
899     Handle<Object> capture_obj = captures_->at(i);
900     if (capture_obj->IsUndefined(isolate_)) {
901       *capture_exists = false;
902       return isolate_->factory()->empty_string();
903     }
904     *capture_exists = true;
905     return Object::ToString(isolate_, capture_obj);
906   }
907 
GetPrefix()908   Handle<String> GetPrefix() override {
909     return isolate_->factory()->NewSubString(subject_, 0, match_position_);
910   }
911 
GetSuffix()912   Handle<String> GetSuffix() override {
913     const int match_end_position = match_position_ + match_->length();
914     return isolate_->factory()->NewSubString(subject_, match_end_position,
915                                              subject_->length());
916   }
917 
CaptureCount()918   int CaptureCount() override { return static_cast<int>(captures_->size()); }
919 
~VectorBackedMatch()920   virtual ~VectorBackedMatch() {}
921 
922  private:
923   Isolate* isolate_;
924   Handle<String> subject_;
925   Handle<String> match_;
926   const int match_position_;
927   ZoneVector<Handle<Object>>* captures_;
928 };
929 
930 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
931 // separate last match info.  See comment on that function.
932 template <bool has_capture>
SearchRegExpMultiple(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_array,Handle<JSArray> result_array)933 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
934                                     Handle<JSRegExp> regexp,
935                                     Handle<RegExpMatchInfo> last_match_array,
936                                     Handle<JSArray> result_array) {
937   DCHECK(subject->IsFlat());
938   DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
939 
940   int capture_count = regexp->CaptureCount();
941   int subject_length = subject->length();
942 
943   static const int kMinLengthToCache = 0x1000;
944 
945   if (subject_length > kMinLengthToCache) {
946     FixedArray* last_match_cache;
947     Object* cached_answer = RegExpResultsCache::Lookup(
948         isolate->heap(), *subject, regexp->data(), &last_match_cache,
949         RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
950     if (cached_answer->IsFixedArray()) {
951       int capture_registers = (capture_count + 1) * 2;
952       int32_t* last_match = NewArray<int32_t>(capture_registers);
953       for (int i = 0; i < capture_registers; i++) {
954         last_match[i] = Smi::cast(last_match_cache->get(i))->value();
955       }
956       Handle<FixedArray> cached_fixed_array =
957           Handle<FixedArray>(FixedArray::cast(cached_answer));
958       // The cache FixedArray is a COW-array and we need to return a copy.
959       Handle<FixedArray> copied_fixed_array =
960           isolate->factory()->CopyFixedArrayWithMap(
961               cached_fixed_array, isolate->factory()->fixed_array_map());
962       JSArray::SetContent(result_array, copied_fixed_array);
963       RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
964                                    last_match);
965       DeleteArray(last_match);
966       return *result_array;
967     }
968   }
969 
970   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
971   if (global_cache.HasException()) return isolate->heap()->exception();
972 
973   // Ensured in Runtime_RegExpExecMultiple.
974   DCHECK(result_array->HasFastObjectElements());
975   Handle<FixedArray> result_elements(
976       FixedArray::cast(result_array->elements()));
977   if (result_elements->length() < 16) {
978     result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
979   }
980 
981   FixedArrayBuilder builder(result_elements);
982 
983   // Position to search from.
984   int match_start = -1;
985   int match_end = 0;
986   bool first = true;
987 
988   // Two smis before and after the match, for very long strings.
989   static const int kMaxBuilderEntriesPerRegExpMatch = 5;
990 
991   while (true) {
992     int32_t* current_match = global_cache.FetchNext();
993     if (current_match == NULL) break;
994     match_start = current_match[0];
995     builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
996     if (match_end < match_start) {
997       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
998                                                 match_start);
999     }
1000     match_end = current_match[1];
1001     {
1002       // Avoid accumulating new handles inside loop.
1003       HandleScope temp_scope(isolate);
1004       Handle<String> match;
1005       if (!first) {
1006         match = isolate->factory()->NewProperSubString(subject, match_start,
1007                                                        match_end);
1008       } else {
1009         match =
1010             isolate->factory()->NewSubString(subject, match_start, match_end);
1011         first = false;
1012       }
1013 
1014       if (has_capture) {
1015         // Arguments array to replace function is match, captures, index and
1016         // subject, i.e., 3 + capture count in total.
1017         Handle<FixedArray> elements =
1018             isolate->factory()->NewFixedArray(3 + capture_count);
1019 
1020         elements->set(0, *match);
1021         for (int i = 1; i <= capture_count; i++) {
1022           int start = current_match[i * 2];
1023           if (start >= 0) {
1024             int end = current_match[i * 2 + 1];
1025             DCHECK(start <= end);
1026             Handle<String> substring =
1027                 isolate->factory()->NewSubString(subject, start, end);
1028             elements->set(i, *substring);
1029           } else {
1030             DCHECK(current_match[i * 2 + 1] < 0);
1031             elements->set(i, isolate->heap()->undefined_value());
1032           }
1033         }
1034         elements->set(capture_count + 1, Smi::FromInt(match_start));
1035         elements->set(capture_count + 2, *subject);
1036         builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1037       } else {
1038         builder.Add(*match);
1039       }
1040     }
1041   }
1042 
1043   if (global_cache.HasException()) return isolate->heap()->exception();
1044 
1045   if (match_start >= 0) {
1046     // Finished matching, with at least one match.
1047     if (match_end < subject_length) {
1048       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1049                                                 subject_length);
1050     }
1051 
1052     RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
1053                                  global_cache.LastSuccessfulMatch());
1054 
1055     if (subject_length > kMinLengthToCache) {
1056       // Store the last successful match into the array for caching.
1057       // TODO(yangguo): do not expose last match to JS and simplify caching.
1058       int capture_registers = (capture_count + 1) * 2;
1059       Handle<FixedArray> last_match_cache =
1060           isolate->factory()->NewFixedArray(capture_registers);
1061       int32_t* last_match = global_cache.LastSuccessfulMatch();
1062       for (int i = 0; i < capture_registers; i++) {
1063         last_match_cache->set(i, Smi::FromInt(last_match[i]));
1064       }
1065       Handle<FixedArray> result_fixed_array = builder.array();
1066       result_fixed_array->Shrink(builder.length());
1067       // Cache the result and copy the FixedArray into a COW array.
1068       Handle<FixedArray> copied_fixed_array =
1069           isolate->factory()->CopyFixedArrayWithMap(
1070               result_fixed_array, isolate->factory()->fixed_array_map());
1071       RegExpResultsCache::Enter(
1072           isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1073           last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1074     }
1075     return *builder.ToJSArray(result_array);
1076   } else {
1077     return isolate->heap()->null_value();  // No matches at all.
1078   }
1079 }
1080 
StringReplaceNonGlobalRegExpWithFunction(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<Object> replace_obj)1081 MUST_USE_RESULT MaybeHandle<String> StringReplaceNonGlobalRegExpWithFunction(
1082     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
1083     Handle<Object> replace_obj) {
1084   Factory* factory = isolate->factory();
1085   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1086 
1087   const int flags = regexp->GetFlags();
1088 
1089   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1090   DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1091 
1092   // TODO(jgruber): This should be an easy port to CSA with massive payback.
1093 
1094   const bool sticky = (flags & JSRegExp::kSticky) != 0;
1095   uint32_t last_index = 0;
1096   if (sticky) {
1097     Handle<Object> last_index_obj(regexp->LastIndex(), isolate);
1098     ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1099                                Object::ToLength(isolate, last_index_obj),
1100                                String);
1101     last_index = PositiveNumberToUint32(*last_index_obj);
1102 
1103     if (static_cast<int>(last_index) > subject->length()) last_index = 0;
1104   }
1105 
1106   Handle<Object> match_indices_obj;
1107   ASSIGN_RETURN_ON_EXCEPTION(
1108       isolate, match_indices_obj,
1109       RegExpImpl::Exec(regexp, subject, last_index, last_match_info), String);
1110 
1111   if (match_indices_obj->IsNull(isolate)) {
1112     if (sticky) regexp->SetLastIndex(0);
1113     return subject;
1114   }
1115 
1116   Handle<RegExpMatchInfo> match_indices =
1117       Handle<RegExpMatchInfo>::cast(match_indices_obj);
1118 
1119   const int index = match_indices->Capture(0);
1120   const int end_of_match = match_indices->Capture(1);
1121 
1122   if (sticky) regexp->SetLastIndex(end_of_match);
1123 
1124   IncrementalStringBuilder builder(isolate);
1125   builder.AppendString(factory->NewSubString(subject, 0, index));
1126 
1127   // Compute the parameter list consisting of the match, captures, index,
1128   // and subject for the replace function invocation.
1129   // The number of captures plus one for the match.
1130   const int m = match_indices->NumberOfCaptureRegisters() / 2;
1131 
1132   const int argc = m + 2;
1133   ScopedVector<Handle<Object>> argv(argc);
1134 
1135   for (int j = 0; j < m; j++) {
1136     bool ok;
1137     Handle<String> capture =
1138         RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1139     if (ok) {
1140       argv[j] = capture;
1141     } else {
1142       argv[j] = factory->undefined_value();
1143     }
1144   }
1145 
1146   argv[argc - 2] = handle(Smi::FromInt(index), isolate);
1147   argv[argc - 1] = subject;
1148 
1149   Handle<Object> replacement_obj;
1150   ASSIGN_RETURN_ON_EXCEPTION(
1151       isolate, replacement_obj,
1152       Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1153                       argv.start()),
1154       String);
1155 
1156   Handle<String> replacement;
1157   ASSIGN_RETURN_ON_EXCEPTION(
1158       isolate, replacement, Object::ToString(isolate, replacement_obj), String);
1159 
1160   builder.AppendString(replacement);
1161   builder.AppendString(
1162       factory->NewSubString(subject, end_of_match, subject->length()));
1163 
1164   return builder.Finish();
1165 }
1166 
1167 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1168 // doesn't properly call the underlying exec method.
RegExpReplace(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> string,Handle<String> replace)1169 MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
1170                                                   Handle<JSRegExp> regexp,
1171                                                   Handle<String> string,
1172                                                   Handle<String> replace) {
1173   Factory* factory = isolate->factory();
1174 
1175   const int flags = regexp->GetFlags();
1176   const bool global = (flags & JSRegExp::kGlobal) != 0;
1177   const bool sticky = (flags & JSRegExp::kSticky) != 0;
1178 
1179   replace = String::Flatten(replace);
1180 
1181   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1182 
1183   if (!global) {
1184     // Non-global regexp search, string replace.
1185 
1186     uint32_t last_index = 0;
1187     if (sticky) {
1188       Handle<Object> last_index_obj(regexp->LastIndex(), isolate);
1189       ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1190                                  Object::ToLength(isolate, last_index_obj),
1191                                  String);
1192       last_index = PositiveNumberToUint32(*last_index_obj);
1193 
1194       if (static_cast<int>(last_index) > string->length()) last_index = 0;
1195     }
1196 
1197     Handle<Object> match_indices_obj;
1198     ASSIGN_RETURN_ON_EXCEPTION(
1199         isolate, match_indices_obj,
1200         RegExpImpl::Exec(regexp, string, last_index, last_match_info), String);
1201 
1202     if (match_indices_obj->IsNull(isolate)) {
1203       if (sticky) regexp->SetLastIndex(0);
1204       return string;
1205     }
1206 
1207     auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1208 
1209     const int start_index = match_indices->Capture(0);
1210     const int end_index = match_indices->Capture(1);
1211 
1212     if (sticky) regexp->SetLastIndex(end_index);
1213 
1214     IncrementalStringBuilder builder(isolate);
1215     builder.AppendString(factory->NewSubString(string, 0, start_index));
1216 
1217     if (replace->length() > 0) {
1218       MatchInfoBackedMatch m(isolate, string, match_indices);
1219       Handle<String> replacement;
1220       ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1221                                  String::GetSubstitution(isolate, &m, replace),
1222                                  String);
1223       builder.AppendString(replacement);
1224     }
1225 
1226     builder.AppendString(
1227         factory->NewSubString(string, end_index, string->length()));
1228     return builder.Finish();
1229   } else {
1230     // Global regexp search, string replace.
1231     DCHECK(global);
1232     RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1233                         String);
1234 
1235     if (replace->length() == 0) {
1236       if (string->HasOnlyOneByteChars()) {
1237         Object* result =
1238             StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1239                 isolate, string, regexp, last_match_info);
1240         return handle(String::cast(result), isolate);
1241       } else {
1242         Object* result =
1243             StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1244                 isolate, string, regexp, last_match_info);
1245         return handle(String::cast(result), isolate);
1246       }
1247     }
1248 
1249     Object* result = StringReplaceGlobalRegExpWithString(
1250         isolate, string, regexp, replace, last_match_info);
1251     if (result->IsString()) {
1252       return handle(String::cast(result), isolate);
1253     } else {
1254       return MaybeHandle<String>();
1255     }
1256   }
1257 
1258   UNREACHABLE();
1259   return MaybeHandle<String>();
1260 }
1261 
1262 }  // namespace
1263 
1264 // This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)1265 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1266   HandleScope handles(isolate);
1267   DCHECK_EQ(4, args.length());
1268 
1269   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1270   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1271   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1272   CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1273 
1274   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1275   CHECK(result_array->HasFastObjectElements());
1276 
1277   subject = String::Flatten(subject);
1278   CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1279 
1280   Object* result;
1281   if (regexp->CaptureCount() == 0) {
1282     result = SearchRegExpMultiple<false>(isolate, subject, regexp,
1283                                          last_match_info, result_array);
1284   } else {
1285     result = SearchRegExpMultiple<true>(isolate, subject, regexp,
1286                                         last_match_info, result_array);
1287   }
1288   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1289   return result;
1290 }
1291 
RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction)1292 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1293   HandleScope scope(isolate);
1294   DCHECK_EQ(3, args.length());
1295 
1296   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1297   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1298   CONVERT_ARG_HANDLE_CHECKED(JSObject, replace, 2);
1299 
1300   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1301 
1302   RETURN_RESULT_OR_FAILURE(isolate, StringReplaceNonGlobalRegExpWithFunction(
1303                                         isolate, subject, regexp, replace));
1304 }
1305 
1306 namespace {
1307 
1308 // ES##sec-speciesconstructor
1309 // SpeciesConstructor ( O, defaultConstructor )
SpeciesConstructor(Isolate * isolate,Handle<JSReceiver> recv,Handle<JSFunction> default_ctor)1310 MUST_USE_RESULT MaybeHandle<Object> SpeciesConstructor(
1311     Isolate* isolate, Handle<JSReceiver> recv,
1312     Handle<JSFunction> default_ctor) {
1313   Handle<Object> ctor_obj;
1314   ASSIGN_RETURN_ON_EXCEPTION(
1315       isolate, ctor_obj,
1316       JSObject::GetProperty(recv, isolate->factory()->constructor_string()),
1317       Object);
1318 
1319   if (ctor_obj->IsUndefined(isolate)) return default_ctor;
1320 
1321   if (!ctor_obj->IsJSReceiver()) {
1322     THROW_NEW_ERROR(isolate,
1323                     NewTypeError(MessageTemplate::kConstructorNotReceiver),
1324                     Object);
1325   }
1326 
1327   Handle<JSReceiver> ctor = Handle<JSReceiver>::cast(ctor_obj);
1328 
1329   Handle<Object> species;
1330   ASSIGN_RETURN_ON_EXCEPTION(
1331       isolate, species,
1332       JSObject::GetProperty(ctor, isolate->factory()->species_symbol()),
1333       Object);
1334 
1335   if (species->IsNullOrUndefined(isolate)) {
1336     return default_ctor;
1337   }
1338 
1339   if (species->IsConstructor()) return species;
1340 
1341   THROW_NEW_ERROR(
1342       isolate, NewTypeError(MessageTemplate::kSpeciesNotConstructor), Object);
1343 }
1344 
ToUint32(Isolate * isolate,Handle<Object> object,uint32_t * out)1345 MUST_USE_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1346                                              Handle<Object> object,
1347                                              uint32_t* out) {
1348   if (object->IsUndefined(isolate)) {
1349     *out = kMaxUInt32;
1350     return object;
1351   }
1352 
1353   Handle<Object> number;
1354   ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(object), Object);
1355   *out = NumberToUint32(*number);
1356   return object;
1357 }
1358 
NewJSArrayWithElements(Isolate * isolate,Handle<FixedArray> elems,int num_elems)1359 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1360                                        Handle<FixedArray> elems,
1361                                        int num_elems) {
1362   elems->Shrink(num_elems);
1363   return isolate->factory()->NewJSArrayWithElements(elems);
1364 }
1365 
1366 }  // namespace
1367 
1368 // Slow path for:
1369 // ES#sec-regexp.prototype-@@replace
1370 // RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit)1371 RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1372   HandleScope scope(isolate);
1373   DCHECK_EQ(3, args.length());
1374 
1375   DCHECK(args[1]->IsString());
1376 
1377   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1378   CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1379   CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1380 
1381   Factory* factory = isolate->factory();
1382 
1383   Handle<JSFunction> regexp_fun = isolate->regexp_function();
1384   Handle<Object> ctor;
1385   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1386       isolate, ctor, SpeciesConstructor(isolate, recv, regexp_fun));
1387 
1388   Handle<Object> flags_obj;
1389   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1390       isolate, flags_obj, JSObject::GetProperty(recv, factory->flags_string()));
1391 
1392   Handle<String> flags;
1393   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1394                                      Object::ToString(isolate, flags_obj));
1395 
1396   Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1397   const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1398 
1399   Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1400   const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1401 
1402   Handle<String> new_flags = flags;
1403   if (!sticky) {
1404     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1405                                        factory->NewConsString(flags, y_str));
1406   }
1407 
1408   Handle<JSReceiver> splitter;
1409   {
1410     const int argc = 2;
1411 
1412     ScopedVector<Handle<Object>> argv(argc);
1413     argv[0] = recv;
1414     argv[1] = new_flags;
1415 
1416     Handle<JSFunction> ctor_fun = Handle<JSFunction>::cast(ctor);
1417     Handle<Object> splitter_obj;
1418     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1419         isolate, splitter_obj, Execution::New(ctor_fun, argc, argv.start()));
1420 
1421     splitter = Handle<JSReceiver>::cast(splitter_obj);
1422   }
1423 
1424   uint32_t limit;
1425   RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1426 
1427   const uint32_t length = string->length();
1428 
1429   if (limit == 0) return *factory->NewJSArray(0);
1430 
1431   if (length == 0) {
1432     Handle<Object> result;
1433     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1434         isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1435                                                  factory->undefined_value()));
1436 
1437     if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1438 
1439     Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1440     elems->set(0, *string);
1441     return *factory->NewJSArrayWithElements(elems);
1442   }
1443 
1444   static const int kInitialArraySize = 8;
1445   Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1446   int num_elems = 0;
1447 
1448   uint32_t string_index = 0;
1449   uint32_t prev_string_index = 0;
1450   while (string_index < length) {
1451     RETURN_FAILURE_ON_EXCEPTION(
1452         isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1453 
1454     Handle<Object> result;
1455     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1456         isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1457                                                  factory->undefined_value()));
1458 
1459     if (result->IsNull(isolate)) {
1460       string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
1461                                                      string_index, unicode);
1462       continue;
1463     }
1464 
1465     Handle<Object> last_index_obj;
1466     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1467         isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1468 
1469     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1470         isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1471 
1472     const uint32_t end =
1473         std::min(PositiveNumberToUint32(*last_index_obj), length);
1474     if (end == prev_string_index) {
1475       string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
1476                                                      string_index, unicode);
1477       continue;
1478     }
1479 
1480     {
1481       Handle<String> substr =
1482           factory->NewSubString(string, prev_string_index, string_index);
1483       elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1484       if (static_cast<uint32_t>(num_elems) == limit) {
1485         return *NewJSArrayWithElements(isolate, elems, num_elems);
1486       }
1487     }
1488 
1489     prev_string_index = end;
1490 
1491     Handle<Object> num_captures_obj;
1492     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1493         isolate, num_captures_obj,
1494         Object::GetProperty(result, isolate->factory()->length_string()));
1495 
1496     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1497         isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1498     const int num_captures = PositiveNumberToUint32(*num_captures_obj);
1499 
1500     for (int i = 1; i < num_captures; i++) {
1501       Handle<Object> capture;
1502       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1503           isolate, capture, Object::GetElement(isolate, result, i));
1504       elems = FixedArray::SetAndGrow(elems, num_elems++, capture);
1505       if (static_cast<uint32_t>(num_elems) == limit) {
1506         return *NewJSArrayWithElements(isolate, elems, num_elems);
1507       }
1508     }
1509 
1510     string_index = prev_string_index;
1511   }
1512 
1513   {
1514     Handle<String> substr =
1515         factory->NewSubString(string, prev_string_index, length);
1516     elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1517   }
1518 
1519   return *NewJSArrayWithElements(isolate, elems, num_elems);
1520 }
1521 
1522 // Slow path for:
1523 // ES#sec-regexp.prototype-@@replace
1524 // RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplace)1525 RUNTIME_FUNCTION(Runtime_RegExpReplace) {
1526   HandleScope scope(isolate);
1527   DCHECK_EQ(3, args.length());
1528 
1529   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1530   CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1531   Handle<Object> replace_obj = args.at(2);
1532 
1533   Factory* factory = isolate->factory();
1534 
1535   string = String::Flatten(string);
1536 
1537   const bool functional_replace = replace_obj->IsCallable();
1538 
1539   Handle<String> replace;
1540   if (!functional_replace) {
1541     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1542                                        Object::ToString(isolate, replace_obj));
1543   }
1544 
1545   // Fast-path for unmodified JSRegExps (and non-functional replace).
1546   if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1547     // We should never get here with functional replace because unmodified
1548     // regexp and functional replace should be fully handled in CSA code.
1549     CHECK(!functional_replace);
1550     Handle<Object> result;
1551     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1552         isolate, result,
1553         RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
1554     DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
1555     return *result;
1556   }
1557 
1558   const uint32_t length = string->length();
1559 
1560   Handle<Object> global_obj;
1561   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1562       isolate, global_obj,
1563       JSReceiver::GetProperty(recv, factory->global_string()));
1564   const bool global = global_obj->BooleanValue();
1565 
1566   bool unicode = false;
1567   if (global) {
1568     Handle<Object> unicode_obj;
1569     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1570         isolate, unicode_obj,
1571         JSReceiver::GetProperty(recv, factory->unicode_string()));
1572     unicode = unicode_obj->BooleanValue();
1573 
1574     RETURN_FAILURE_ON_EXCEPTION(isolate,
1575                                 RegExpUtils::SetLastIndex(isolate, recv, 0));
1576   }
1577 
1578   Zone zone(isolate->allocator(), ZONE_NAME);
1579   ZoneVector<Handle<Object>> results(&zone);
1580 
1581   while (true) {
1582     Handle<Object> result;
1583     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1584         isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1585                                                  factory->undefined_value()));
1586 
1587     if (result->IsNull(isolate)) break;
1588 
1589     results.push_back(result);
1590     if (!global) break;
1591 
1592     Handle<Object> match_obj;
1593     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1594                                        Object::GetElement(isolate, result, 0));
1595 
1596     Handle<String> match;
1597     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1598                                        Object::ToString(isolate, match_obj));
1599 
1600     if (match->length() == 0) {
1601       RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1602                                                isolate, recv, string, unicode));
1603     }
1604   }
1605 
1606   // TODO(jgruber): Look into ReplacementStringBuilder instead.
1607   IncrementalStringBuilder builder(isolate);
1608   uint32_t next_source_position = 0;
1609 
1610   for (const auto& result : results) {
1611     Handle<Object> captures_length_obj;
1612     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1613         isolate, captures_length_obj,
1614         Object::GetProperty(result, factory->length_string()));
1615 
1616     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1617         isolate, captures_length_obj,
1618         Object::ToLength(isolate, captures_length_obj));
1619     const int captures_length = PositiveNumberToUint32(*captures_length_obj);
1620 
1621     Handle<Object> match_obj;
1622     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1623                                        Object::GetElement(isolate, result, 0));
1624 
1625     Handle<String> match;
1626     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1627                                        Object::ToString(isolate, match_obj));
1628 
1629     const int match_length = match->length();
1630 
1631     Handle<Object> position_obj;
1632     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1633         isolate, position_obj,
1634         Object::GetProperty(result, factory->index_string()));
1635 
1636     // TODO(jgruber): Extract and correct error handling. Since we can go up to
1637     // 2^53 - 1 (at least for ToLength), we might actually need uint64_t here?
1638     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1639         isolate, position_obj, Object::ToInteger(isolate, position_obj));
1640     const uint32_t position =
1641         std::min(PositiveNumberToUint32(*position_obj), length);
1642 
1643     ZoneVector<Handle<Object>> captures(&zone);
1644     for (int n = 0; n < captures_length; n++) {
1645       Handle<Object> capture;
1646       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1647           isolate, capture, Object::GetElement(isolate, result, n));
1648 
1649       if (!capture->IsUndefined(isolate)) {
1650         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1651                                            Object::ToString(isolate, capture));
1652       }
1653       captures.push_back(capture);
1654     }
1655 
1656     Handle<String> replacement;
1657     if (functional_replace) {
1658       const int argc = captures_length + 2;
1659       ScopedVector<Handle<Object>> argv(argc);
1660 
1661       for (int j = 0; j < captures_length; j++) {
1662         argv[j] = captures[j];
1663       }
1664 
1665       argv[captures_length] = handle(Smi::FromInt(position), isolate);
1666       argv[captures_length + 1] = string;
1667 
1668       Handle<Object> replacement_obj;
1669       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1670           isolate, replacement_obj,
1671           Execution::Call(isolate, replace_obj, factory->undefined_value(),
1672                           argc, argv.start()));
1673 
1674       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1675           isolate, replacement, Object::ToString(isolate, replacement_obj));
1676     } else {
1677       VectorBackedMatch m(isolate, string, match, position, &captures);
1678       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1679           isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1680     }
1681 
1682     if (position >= next_source_position) {
1683       builder.AppendString(
1684           factory->NewSubString(string, next_source_position, position));
1685       builder.AppendString(replacement);
1686 
1687       next_source_position = position + match_length;
1688     }
1689   }
1690 
1691   if (next_source_position < length) {
1692     builder.AppendString(
1693         factory->NewSubString(string, next_source_position, length));
1694   }
1695 
1696   RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1697 }
1698 
RUNTIME_FUNCTION(Runtime_RegExpExecReThrow)1699 RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
1700   SealHandleScope shs(isolate);
1701   DCHECK_EQ(4, args.length());
1702   Object* exception = isolate->pending_exception();
1703   isolate->clear_pending_exception();
1704   return isolate->ReThrow(exception);
1705 }
1706 
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)1707 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1708   HandleScope scope(isolate);
1709   DCHECK_EQ(3, args.length());
1710   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1711   CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1712   CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1713 
1714   RETURN_FAILURE_ON_EXCEPTION(isolate,
1715                               JSRegExp::Initialize(regexp, source, flags));
1716 
1717   return *regexp;
1718 }
1719 
RUNTIME_FUNCTION(Runtime_IsRegExp)1720 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1721   SealHandleScope shs(isolate);
1722   DCHECK_EQ(1, args.length());
1723   CONVERT_ARG_CHECKED(Object, obj, 0);
1724   return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1725 }
1726 
1727 }  // namespace internal
1728 }  // namespace v8
1729