• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/runtime/runtime-utils.h"
6 
7 #include "src/arguments.h"
8 #include "src/conversions-inl.h"
9 #include "src/isolate-inl.h"
10 #include "src/messages.h"
11 #include "src/regexp/jsregexp-inl.h"
12 #include "src/regexp/jsregexp.h"
13 #include "src/string-builder.h"
14 #include "src/string-search.h"
15 
16 namespace v8 {
17 namespace internal {
18 
19 class CompiledReplacement {
20  public:
CompiledReplacement(Zone * zone)21   explicit CompiledReplacement(Zone* zone)
22       : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
23 
24   // Return whether the replacement is simple.
25   bool Compile(Handle<String> replacement, int capture_count,
26                int subject_length);
27 
28   // Use Apply only if Compile returned false.
29   void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
30              int32_t* match);
31 
32   // Number of distinct parts of the replacement pattern.
parts()33   int parts() { return parts_.length(); }
34 
zone() const35   Zone* zone() const { return zone_; }
36 
37  private:
38   enum PartType {
39     SUBJECT_PREFIX = 1,
40     SUBJECT_SUFFIX,
41     SUBJECT_CAPTURE,
42     REPLACEMENT_SUBSTRING,
43     REPLACEMENT_STRING,
44     NUMBER_OF_PART_TYPES
45   };
46 
47   struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart48     static inline ReplacementPart SubjectMatch() {
49       return ReplacementPart(SUBJECT_CAPTURE, 0);
50     }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart51     static inline ReplacementPart SubjectCapture(int capture_index) {
52       return ReplacementPart(SUBJECT_CAPTURE, capture_index);
53     }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart54     static inline ReplacementPart SubjectPrefix() {
55       return ReplacementPart(SUBJECT_PREFIX, 0);
56     }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart57     static inline ReplacementPart SubjectSuffix(int subject_length) {
58       return ReplacementPart(SUBJECT_SUFFIX, subject_length);
59     }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart60     static inline ReplacementPart ReplacementString() {
61       return ReplacementPart(REPLACEMENT_STRING, 0);
62     }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart63     static inline ReplacementPart ReplacementSubString(int from, int to) {
64       DCHECK(from >= 0);
65       DCHECK(to > from);
66       return ReplacementPart(-from, to);
67     }
68 
69     // If tag <= 0 then it is the negation of a start index of a substring of
70     // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart71     ReplacementPart(int tag, int data) : tag(tag), data(data) {
72       // Must be non-positive or a PartType value.
73       DCHECK(tag < NUMBER_OF_PART_TYPES);
74     }
75     // Either a value of PartType or a non-positive number that is
76     // the negation of an index into the replacement string.
77     int tag;
78     // The data value's interpretation depends on the value of tag:
79     // tag == SUBJECT_PREFIX ||
80     // tag == SUBJECT_SUFFIX:  data is unused.
81     // tag == SUBJECT_CAPTURE: data is the number of the capture.
82     // tag == REPLACEMENT_SUBSTRING ||
83     // tag == REPLACEMENT_STRING:    data is index into array of substrings
84     //                               of the replacement string.
85     // tag <= 0: Temporary representation of the substring of the replacement
86     //           string ranging over -tag .. data.
87     //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
88     //           substring objects.
89     int data;
90   };
91 
92   template <typename Char>
ParseReplacementPattern(ZoneList<ReplacementPart> * parts,Vector<Char> characters,int capture_count,int subject_length,Zone * zone)93   bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
94                                Vector<Char> characters, int capture_count,
95                                int subject_length, Zone* zone) {
96     int length = characters.length();
97     int last = 0;
98     for (int i = 0; i < length; i++) {
99       Char c = characters[i];
100       if (c == '$') {
101         int next_index = i + 1;
102         if (next_index == length) {  // No next character!
103           break;
104         }
105         Char c2 = characters[next_index];
106         switch (c2) {
107           case '$':
108             if (i > last) {
109               // There is a substring before. Include the first "$".
110               parts->Add(
111                   ReplacementPart::ReplacementSubString(last, next_index),
112                   zone);
113               last = next_index + 1;  // Continue after the second "$".
114             } else {
115               // Let the next substring start with the second "$".
116               last = next_index;
117             }
118             i = next_index;
119             break;
120           case '`':
121             if (i > last) {
122               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
123             }
124             parts->Add(ReplacementPart::SubjectPrefix(), zone);
125             i = next_index;
126             last = i + 1;
127             break;
128           case '\'':
129             if (i > last) {
130               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
131             }
132             parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
133             i = next_index;
134             last = i + 1;
135             break;
136           case '&':
137             if (i > last) {
138               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
139             }
140             parts->Add(ReplacementPart::SubjectMatch(), zone);
141             i = next_index;
142             last = i + 1;
143             break;
144           case '0':
145           case '1':
146           case '2':
147           case '3':
148           case '4':
149           case '5':
150           case '6':
151           case '7':
152           case '8':
153           case '9': {
154             int capture_ref = c2 - '0';
155             if (capture_ref > capture_count) {
156               i = next_index;
157               continue;
158             }
159             int second_digit_index = next_index + 1;
160             if (second_digit_index < length) {
161               // Peek ahead to see if we have two digits.
162               Char c3 = characters[second_digit_index];
163               if ('0' <= c3 && c3 <= '9') {  // Double digits.
164                 int double_digit_ref = capture_ref * 10 + c3 - '0';
165                 if (double_digit_ref <= capture_count) {
166                   next_index = second_digit_index;
167                   capture_ref = double_digit_ref;
168                 }
169               }
170             }
171             if (capture_ref > 0) {
172               if (i > last) {
173                 parts->Add(ReplacementPart::ReplacementSubString(last, i),
174                            zone);
175               }
176               DCHECK(capture_ref <= capture_count);
177               parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
178               last = next_index + 1;
179             }
180             i = next_index;
181             break;
182           }
183           default:
184             i = next_index;
185             break;
186         }
187       }
188     }
189     if (length > last) {
190       if (last == 0) {
191         // Replacement is simple.  Do not use Apply to do the replacement.
192         return true;
193       } else {
194         parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
195       }
196     }
197     return false;
198   }
199 
200   ZoneList<ReplacementPart> parts_;
201   ZoneList<Handle<String> > replacement_substrings_;
202   Zone* zone_;
203 };
204 
205 
Compile(Handle<String> replacement,int capture_count,int subject_length)206 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
207                                   int subject_length) {
208   {
209     DisallowHeapAllocation no_gc;
210     String::FlatContent content = replacement->GetFlatContent();
211     DCHECK(content.IsFlat());
212     bool simple = false;
213     if (content.IsOneByte()) {
214       simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
215                                        capture_count, subject_length, zone());
216     } else {
217       DCHECK(content.IsTwoByte());
218       simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
219                                        capture_count, subject_length, zone());
220     }
221     if (simple) return true;
222   }
223 
224   Isolate* isolate = replacement->GetIsolate();
225   // Find substrings of replacement string and create them as String objects.
226   int substring_index = 0;
227   for (int i = 0, n = parts_.length(); i < n; i++) {
228     int tag = parts_[i].tag;
229     if (tag <= 0) {  // A replacement string slice.
230       int from = -tag;
231       int to = parts_[i].data;
232       replacement_substrings_.Add(
233           isolate->factory()->NewSubString(replacement, from, to), zone());
234       parts_[i].tag = REPLACEMENT_SUBSTRING;
235       parts_[i].data = substring_index;
236       substring_index++;
237     } else if (tag == REPLACEMENT_STRING) {
238       replacement_substrings_.Add(replacement, zone());
239       parts_[i].data = substring_index;
240       substring_index++;
241     }
242   }
243   return false;
244 }
245 
246 
Apply(ReplacementStringBuilder * builder,int match_from,int match_to,int32_t * match)247 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
248                                 int match_from, int match_to, int32_t* match) {
249   DCHECK_LT(0, parts_.length());
250   for (int i = 0, n = parts_.length(); i < n; i++) {
251     ReplacementPart part = parts_[i];
252     switch (part.tag) {
253       case SUBJECT_PREFIX:
254         if (match_from > 0) builder->AddSubjectSlice(0, match_from);
255         break;
256       case SUBJECT_SUFFIX: {
257         int subject_length = part.data;
258         if (match_to < subject_length) {
259           builder->AddSubjectSlice(match_to, subject_length);
260         }
261         break;
262       }
263       case SUBJECT_CAPTURE: {
264         int capture = part.data;
265         int from = match[capture * 2];
266         int to = match[capture * 2 + 1];
267         if (from >= 0 && to > from) {
268           builder->AddSubjectSlice(from, to);
269         }
270         break;
271       }
272       case REPLACEMENT_SUBSTRING:
273       case REPLACEMENT_STRING:
274         builder->AddString(replacement_substrings_[part.data]);
275         break;
276       default:
277         UNREACHABLE();
278     }
279   }
280 }
281 
282 
FindOneByteStringIndices(Vector<const uint8_t> subject,uint8_t pattern,ZoneList<int> * indices,unsigned int limit,Zone * zone)283 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
284                               ZoneList<int>* indices, unsigned int limit,
285                               Zone* zone) {
286   DCHECK(limit > 0);
287   // Collect indices of pattern in subject using memchr.
288   // Stop after finding at most limit values.
289   const uint8_t* subject_start = subject.start();
290   const uint8_t* subject_end = subject_start + subject.length();
291   const uint8_t* pos = subject_start;
292   while (limit > 0) {
293     pos = reinterpret_cast<const uint8_t*>(
294         memchr(pos, pattern, subject_end - pos));
295     if (pos == NULL) return;
296     indices->Add(static_cast<int>(pos - subject_start), zone);
297     pos++;
298     limit--;
299   }
300 }
301 
302 
FindTwoByteStringIndices(const Vector<const uc16> subject,uc16 pattern,ZoneList<int> * indices,unsigned int limit,Zone * zone)303 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
304                               ZoneList<int>* indices, unsigned int limit,
305                               Zone* zone) {
306   DCHECK(limit > 0);
307   const uc16* subject_start = subject.start();
308   const uc16* subject_end = subject_start + subject.length();
309   for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
310     if (*pos == pattern) {
311       indices->Add(static_cast<int>(pos - subject_start), zone);
312       limit--;
313     }
314   }
315 }
316 
317 
318 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate * isolate,Vector<const SubjectChar> subject,Vector<const PatternChar> pattern,ZoneList<int> * indices,unsigned int limit,Zone * zone)319 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
320                        Vector<const PatternChar> pattern,
321                        ZoneList<int>* indices, unsigned int limit, Zone* zone) {
322   DCHECK(limit > 0);
323   // Collect indices of pattern in subject.
324   // Stop after finding at most limit values.
325   int pattern_length = pattern.length();
326   int index = 0;
327   StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
328   while (limit > 0) {
329     index = search.Search(subject, index);
330     if (index < 0) return;
331     indices->Add(index, zone);
332     index += pattern_length;
333     limit--;
334   }
335 }
336 
337 
FindStringIndicesDispatch(Isolate * isolate,String * subject,String * pattern,ZoneList<int> * indices,unsigned int limit,Zone * zone)338 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
339                                String* pattern, ZoneList<int>* indices,
340                                unsigned int limit, Zone* zone) {
341   {
342     DisallowHeapAllocation no_gc;
343     String::FlatContent subject_content = subject->GetFlatContent();
344     String::FlatContent pattern_content = pattern->GetFlatContent();
345     DCHECK(subject_content.IsFlat());
346     DCHECK(pattern_content.IsFlat());
347     if (subject_content.IsOneByte()) {
348       Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
349       if (pattern_content.IsOneByte()) {
350         Vector<const uint8_t> pattern_vector =
351             pattern_content.ToOneByteVector();
352         if (pattern_vector.length() == 1) {
353           FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
354                                    limit, zone);
355         } else {
356           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
357                             limit, zone);
358         }
359       } else {
360         FindStringIndices(isolate, subject_vector,
361                           pattern_content.ToUC16Vector(), indices, limit, zone);
362       }
363     } else {
364       Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
365       if (pattern_content.IsOneByte()) {
366         Vector<const uint8_t> pattern_vector =
367             pattern_content.ToOneByteVector();
368         if (pattern_vector.length() == 1) {
369           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
370                                    limit, zone);
371         } else {
372           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
373                             limit, zone);
374         }
375       } else {
376         Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
377         if (pattern_vector.length() == 1) {
378           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
379                                    limit, zone);
380         } else {
381           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
382                             limit, zone);
383         }
384       }
385     }
386   }
387 }
388 
389 
390 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> pattern_regexp,Handle<String> replacement,Handle<JSArray> last_match_info)391 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
392     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
393     Handle<String> replacement, Handle<JSArray> last_match_info) {
394   DCHECK(subject->IsFlat());
395   DCHECK(replacement->IsFlat());
396 
397   ZoneScope zone_scope(isolate->runtime_zone());
398   ZoneList<int> indices(8, zone_scope.zone());
399   DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
400   String* pattern =
401       String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
402   int subject_len = subject->length();
403   int pattern_len = pattern->length();
404   int replacement_len = replacement->length();
405 
406   FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
407                             zone_scope.zone());
408 
409   int matches = indices.length();
410   if (matches == 0) return *subject;
411 
412   // Detect integer overflow.
413   int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
414                            static_cast<int64_t>(pattern_len)) *
415                               static_cast<int64_t>(matches) +
416                           static_cast<int64_t>(subject_len);
417   int result_len;
418   if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
419     STATIC_ASSERT(String::kMaxLength < kMaxInt);
420     result_len = kMaxInt;  // Provoke exception.
421   } else {
422     result_len = static_cast<int>(result_len_64);
423   }
424 
425   int subject_pos = 0;
426   int result_pos = 0;
427 
428   MaybeHandle<SeqString> maybe_res;
429   if (ResultSeqString::kHasOneByteEncoding) {
430     maybe_res = isolate->factory()->NewRawOneByteString(result_len);
431   } else {
432     maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
433   }
434   Handle<SeqString> untyped_res;
435   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
436   Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
437 
438   for (int i = 0; i < matches; i++) {
439     // Copy non-matched subject content.
440     if (subject_pos < indices.at(i)) {
441       String::WriteToFlat(*subject, result->GetChars() + result_pos,
442                           subject_pos, indices.at(i));
443       result_pos += indices.at(i) - subject_pos;
444     }
445 
446     // Replace match.
447     if (replacement_len > 0) {
448       String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
449                           replacement_len);
450       result_pos += replacement_len;
451     }
452 
453     subject_pos = indices.at(i) + pattern_len;
454   }
455   // Add remaining subject content at the end.
456   if (subject_pos < subject_len) {
457     String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
458                         subject_len);
459   }
460 
461   int32_t match_indices[] = {indices.at(matches - 1),
462                              indices.at(matches - 1) + pattern_len};
463   RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
464 
465   return *result;
466 }
467 
468 
StringReplaceGlobalRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replacement,Handle<JSArray> last_match_info)469 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
470     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
471     Handle<String> replacement, Handle<JSArray> last_match_info) {
472   DCHECK(subject->IsFlat());
473   DCHECK(replacement->IsFlat());
474 
475   int capture_count = regexp->CaptureCount();
476   int subject_length = subject->length();
477 
478   // CompiledReplacement uses zone allocation.
479   ZoneScope zone_scope(isolate->runtime_zone());
480   CompiledReplacement compiled_replacement(zone_scope.zone());
481   bool simple_replace =
482       compiled_replacement.Compile(replacement, capture_count, subject_length);
483 
484   // Shortcut for simple non-regexp global replacements
485   if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
486     if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
487       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
488           isolate, subject, regexp, replacement, last_match_info);
489     } else {
490       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
491           isolate, subject, regexp, replacement, last_match_info);
492     }
493   }
494 
495   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
496   if (global_cache.HasException()) return isolate->heap()->exception();
497 
498   int32_t* current_match = global_cache.FetchNext();
499   if (current_match == NULL) {
500     if (global_cache.HasException()) return isolate->heap()->exception();
501     return *subject;
502   }
503 
504   // Guessing the number of parts that the final result string is built
505   // from. Global regexps can match any number of times, so we guess
506   // conservatively.
507   int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
508   ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
509 
510   // Number of parts added by compiled replacement plus preceeding
511   // string and possibly suffix after last match.  It is possible for
512   // all components to use two elements when encoded as two smis.
513   const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
514 
515   int prev = 0;
516 
517   do {
518     builder.EnsureCapacity(parts_added_per_loop);
519 
520     int start = current_match[0];
521     int end = current_match[1];
522 
523     if (prev < start) {
524       builder.AddSubjectSlice(prev, start);
525     }
526 
527     if (simple_replace) {
528       builder.AddString(replacement);
529     } else {
530       compiled_replacement.Apply(&builder, start, end, current_match);
531     }
532     prev = end;
533 
534     current_match = global_cache.FetchNext();
535   } while (current_match != NULL);
536 
537   if (global_cache.HasException()) return isolate->heap()->exception();
538 
539   if (prev < subject_length) {
540     builder.EnsureCapacity(2);
541     builder.AddSubjectSlice(prev, subject_length);
542   }
543 
544   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
545                                global_cache.LastSuccessfulMatch());
546 
547   RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
548 }
549 
550 
551 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<JSArray> last_match_info)552 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
553     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
554     Handle<JSArray> last_match_info) {
555   DCHECK(subject->IsFlat());
556 
557   // Shortcut for simple non-regexp global replacements
558   if (regexp->TypeTag() == JSRegExp::ATOM) {
559     Handle<String> empty_string = isolate->factory()->empty_string();
560     if (subject->IsOneByteRepresentation()) {
561       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
562           isolate, subject, regexp, empty_string, last_match_info);
563     } else {
564       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
565           isolate, subject, regexp, empty_string, last_match_info);
566     }
567   }
568 
569   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
570   if (global_cache.HasException()) return isolate->heap()->exception();
571 
572   int32_t* current_match = global_cache.FetchNext();
573   if (current_match == NULL) {
574     if (global_cache.HasException()) return isolate->heap()->exception();
575     return *subject;
576   }
577 
578   int start = current_match[0];
579   int end = current_match[1];
580   int capture_count = regexp->CaptureCount();
581   int subject_length = subject->length();
582 
583   int new_length = subject_length - (end - start);
584   if (new_length == 0) return isolate->heap()->empty_string();
585 
586   Handle<ResultSeqString> answer;
587   if (ResultSeqString::kHasOneByteEncoding) {
588     answer = Handle<ResultSeqString>::cast(
589         isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
590   } else {
591     answer = Handle<ResultSeqString>::cast(
592         isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
593   }
594 
595   int prev = 0;
596   int position = 0;
597 
598   do {
599     start = current_match[0];
600     end = current_match[1];
601     if (prev < start) {
602       // Add substring subject[prev;start] to answer string.
603       String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
604       position += start - prev;
605     }
606     prev = end;
607 
608     current_match = global_cache.FetchNext();
609   } while (current_match != NULL);
610 
611   if (global_cache.HasException()) return isolate->heap()->exception();
612 
613   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
614                                global_cache.LastSuccessfulMatch());
615 
616   if (prev < subject_length) {
617     // Add substring subject[prev;length] to answer string.
618     String::WriteToFlat(*subject, answer->GetChars() + position, prev,
619                         subject_length);
620     position += subject_length - prev;
621   }
622 
623   if (position == 0) return isolate->heap()->empty_string();
624 
625   // Shorten string and fill
626   int string_size = ResultSeqString::SizeFor(position);
627   int allocated_string_size = ResultSeqString::SizeFor(new_length);
628   int delta = allocated_string_size - string_size;
629 
630   answer->set_length(position);
631   if (delta == 0) return *answer;
632 
633   Address end_of_string = answer->address() + string_size;
634   Heap* heap = isolate->heap();
635 
636   // The trimming is performed on a newly allocated object, which is on a
637   // fresly allocated page or on an already swept page. Hence, the sweeper
638   // thread can not get confused with the filler creation. No synchronization
639   // needed.
640   // TODO(hpayer): We should shrink the large object page if the size
641   // of the object changed significantly.
642   if (!heap->lo_space()->Contains(*answer)) {
643     heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
644   }
645   heap->AdjustLiveBytes(*answer, -delta, Heap::CONCURRENT_TO_SWEEPER);
646   return *answer;
647 }
648 
649 
RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString)650 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
651   HandleScope scope(isolate);
652   DCHECK(args.length() == 4);
653 
654   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
655   CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
656   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
657   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
658 
659   CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
660   CHECK(last_match_info->HasFastObjectElements());
661 
662   subject = String::Flatten(subject);
663 
664   if (replacement->length() == 0) {
665     if (subject->HasOnlyOneByteChars()) {
666       return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
667           isolate, subject, regexp, last_match_info);
668     } else {
669       return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
670           isolate, subject, regexp, last_match_info);
671     }
672   }
673 
674   replacement = String::Flatten(replacement);
675 
676   return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
677                                              replacement, last_match_info);
678 }
679 
680 
RUNTIME_FUNCTION(Runtime_StringSplit)681 RUNTIME_FUNCTION(Runtime_StringSplit) {
682   HandleScope handle_scope(isolate);
683   DCHECK(args.length() == 3);
684   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
685   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
686   CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
687   CHECK(limit > 0);
688 
689   int subject_length = subject->length();
690   int pattern_length = pattern->length();
691   CHECK(pattern_length > 0);
692 
693   if (limit == 0xffffffffu) {
694     FixedArray* last_match_cache_unused;
695     Handle<Object> cached_answer(
696         RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
697                                    &last_match_cache_unused,
698                                    RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
699         isolate);
700     if (*cached_answer != Smi::FromInt(0)) {
701       // The cache FixedArray is a COW-array and can therefore be reused.
702       Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
703           Handle<FixedArray>::cast(cached_answer));
704       return *result;
705     }
706   }
707 
708   // The limit can be very large (0xffffffffu), but since the pattern
709   // isn't empty, we can never create more parts than ~half the length
710   // of the subject.
711 
712   subject = String::Flatten(subject);
713   pattern = String::Flatten(pattern);
714 
715   static const int kMaxInitialListCapacity = 16;
716 
717   ZoneScope zone_scope(isolate->runtime_zone());
718 
719   // Find (up to limit) indices of separator and end-of-string in subject
720   int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
721   ZoneList<int> indices(initial_capacity, zone_scope.zone());
722 
723   FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
724                             zone_scope.zone());
725 
726   if (static_cast<uint32_t>(indices.length()) < limit) {
727     indices.Add(subject_length, zone_scope.zone());
728   }
729 
730   // The list indices now contains the end of each part to create.
731 
732   // Create JSArray of substrings separated by separator.
733   int part_count = indices.length();
734 
735   Handle<JSArray> result =
736       isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count,
737                                      INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
738 
739   DCHECK(result->HasFastObjectElements());
740 
741   Handle<FixedArray> elements(FixedArray::cast(result->elements()));
742 
743   if (part_count == 1 && indices.at(0) == subject_length) {
744     elements->set(0, *subject);
745   } else {
746     int part_start = 0;
747     FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
748       int part_end = indices.at(i);
749       Handle<String> substring =
750           isolate->factory()->NewProperSubString(subject, part_start, part_end);
751       elements->set(i, *substring);
752       part_start = part_end + pattern_length;
753     });
754   }
755 
756   if (limit == 0xffffffffu) {
757     if (result->HasFastObjectElements()) {
758       RegExpResultsCache::Enter(isolate, subject, pattern, elements,
759                                 isolate->factory()->empty_fixed_array(),
760                                 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
761     }
762   }
763 
764   return *result;
765 }
766 
767 
RUNTIME_FUNCTION(Runtime_RegExpExec)768 RUNTIME_FUNCTION(Runtime_RegExpExec) {
769   HandleScope scope(isolate);
770   DCHECK(args.length() == 4);
771   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
772   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
773   CONVERT_INT32_ARG_CHECKED(index, 2);
774   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
775   // Due to the way the JS calls are constructed this must be less than the
776   // length of a string, i.e. it is always a Smi.  We check anyway for security.
777   CHECK(index >= 0);
778   CHECK(index <= subject->length());
779   isolate->counters()->regexp_entry_runtime()->Increment();
780   RETURN_RESULT_OR_FAILURE(
781       isolate, RegExpImpl::Exec(regexp, subject, index, last_match_info));
782 }
783 
784 
RUNTIME_FUNCTION(Runtime_RegExpFlags)785 RUNTIME_FUNCTION(Runtime_RegExpFlags) {
786   SealHandleScope shs(isolate);
787   DCHECK(args.length() == 1);
788   CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
789   return regexp->flags();
790 }
791 
792 
RUNTIME_FUNCTION(Runtime_RegExpSource)793 RUNTIME_FUNCTION(Runtime_RegExpSource) {
794   SealHandleScope shs(isolate);
795   DCHECK(args.length() == 1);
796   CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
797   return regexp->source();
798 }
799 
800 
RUNTIME_FUNCTION(Runtime_RegExpConstructResult)801 RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
802   HandleScope handle_scope(isolate);
803   DCHECK(args.length() == 3);
804   CONVERT_SMI_ARG_CHECKED(size, 0);
805   CHECK(size >= 0 && size <= FixedArray::kMaxLength);
806   CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
807   CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
808   Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
809   Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
810   Handle<JSObject> object =
811       isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED);
812   Handle<JSArray> array = Handle<JSArray>::cast(object);
813   array->set_elements(*elements);
814   array->set_length(Smi::FromInt(size));
815   // Write in-object properties after the length of the array.
816   array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
817   array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
818   return *array;
819 }
820 
821 
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)822 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
823   HandleScope scope(isolate);
824   DCHECK(args.length() == 3);
825   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
826   CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
827   CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
828 
829   RETURN_FAILURE_ON_EXCEPTION(isolate,
830                               JSRegExp::Initialize(regexp, source, flags));
831 
832   return *regexp;
833 }
834 
835 
836 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
837 // separate last match info.  See comment on that function.
838 template <bool has_capture>
SearchRegExpMultiple(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<JSArray> last_match_array,Handle<JSArray> result_array)839 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
840                                     Handle<JSRegExp> regexp,
841                                     Handle<JSArray> last_match_array,
842                                     Handle<JSArray> result_array) {
843   DCHECK(subject->IsFlat());
844   DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
845 
846   int capture_count = regexp->CaptureCount();
847   int subject_length = subject->length();
848 
849   static const int kMinLengthToCache = 0x1000;
850 
851   if (subject_length > kMinLengthToCache) {
852     FixedArray* last_match_cache;
853     Object* cached_answer = RegExpResultsCache::Lookup(
854         isolate->heap(), *subject, regexp->data(), &last_match_cache,
855         RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
856     if (cached_answer->IsFixedArray()) {
857       int capture_registers = (capture_count + 1) * 2;
858       int32_t* last_match = NewArray<int32_t>(capture_registers);
859       for (int i = 0; i < capture_registers; i++) {
860         last_match[i] = Smi::cast(last_match_cache->get(i))->value();
861       }
862       Handle<FixedArray> cached_fixed_array =
863           Handle<FixedArray>(FixedArray::cast(cached_answer));
864       // The cache FixedArray is a COW-array and can therefore be reused.
865       JSArray::SetContent(result_array, cached_fixed_array);
866       RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
867                                    last_match);
868       DeleteArray(last_match);
869       return *result_array;
870     }
871   }
872 
873   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
874   if (global_cache.HasException()) return isolate->heap()->exception();
875 
876   // Ensured in Runtime_RegExpExecMultiple.
877   DCHECK(result_array->HasFastObjectElements());
878   Handle<FixedArray> result_elements(
879       FixedArray::cast(result_array->elements()));
880   if (result_elements->length() < 16) {
881     result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
882   }
883 
884   FixedArrayBuilder builder(result_elements);
885 
886   // Position to search from.
887   int match_start = -1;
888   int match_end = 0;
889   bool first = true;
890 
891   // Two smis before and after the match, for very long strings.
892   static const int kMaxBuilderEntriesPerRegExpMatch = 5;
893 
894   while (true) {
895     int32_t* current_match = global_cache.FetchNext();
896     if (current_match == NULL) break;
897     match_start = current_match[0];
898     builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
899     if (match_end < match_start) {
900       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
901                                                 match_start);
902     }
903     match_end = current_match[1];
904     {
905       // Avoid accumulating new handles inside loop.
906       HandleScope temp_scope(isolate);
907       Handle<String> match;
908       if (!first) {
909         match = isolate->factory()->NewProperSubString(subject, match_start,
910                                                        match_end);
911       } else {
912         match =
913             isolate->factory()->NewSubString(subject, match_start, match_end);
914         first = false;
915       }
916 
917       if (has_capture) {
918         // Arguments array to replace function is match, captures, index and
919         // subject, i.e., 3 + capture count in total.
920         Handle<FixedArray> elements =
921             isolate->factory()->NewFixedArray(3 + capture_count);
922 
923         elements->set(0, *match);
924         for (int i = 1; i <= capture_count; i++) {
925           int start = current_match[i * 2];
926           if (start >= 0) {
927             int end = current_match[i * 2 + 1];
928             DCHECK(start <= end);
929             Handle<String> substring =
930                 isolate->factory()->NewSubString(subject, start, end);
931             elements->set(i, *substring);
932           } else {
933             DCHECK(current_match[i * 2 + 1] < 0);
934             elements->set(i, isolate->heap()->undefined_value());
935           }
936         }
937         elements->set(capture_count + 1, Smi::FromInt(match_start));
938         elements->set(capture_count + 2, *subject);
939         builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
940       } else {
941         builder.Add(*match);
942       }
943     }
944   }
945 
946   if (global_cache.HasException()) return isolate->heap()->exception();
947 
948   if (match_start >= 0) {
949     // Finished matching, with at least one match.
950     if (match_end < subject_length) {
951       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
952                                                 subject_length);
953     }
954 
955     RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
956                                  global_cache.LastSuccessfulMatch());
957 
958     if (subject_length > kMinLengthToCache) {
959       // Store the last successful match into the array for caching.
960       // TODO(yangguo): do not expose last match to JS and simplify caching.
961       int capture_registers = (capture_count + 1) * 2;
962       Handle<FixedArray> last_match_cache =
963           isolate->factory()->NewFixedArray(capture_registers);
964       int32_t* last_match = global_cache.LastSuccessfulMatch();
965       for (int i = 0; i < capture_registers; i++) {
966         last_match_cache->set(i, Smi::FromInt(last_match[i]));
967       }
968       Handle<FixedArray> result_fixed_array = builder.array();
969       result_fixed_array->Shrink(builder.length());
970       // Cache the result and turn the FixedArray into a COW array.
971       RegExpResultsCache::Enter(
972           isolate, subject, handle(regexp->data(), isolate), result_fixed_array,
973           last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
974     }
975     return *builder.ToJSArray(result_array);
976   } else {
977     return isolate->heap()->null_value();  // No matches at all.
978   }
979 }
980 
981 
982 // This is only called for StringReplaceGlobalRegExpWithFunction.  This sets
983 // lastMatchInfoOverride to maintain the last match info, so we don't need to
984 // set any other last match array info.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)985 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
986   HandleScope handles(isolate);
987   DCHECK(args.length() == 4);
988 
989   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
990   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
991   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
992   CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
993   CHECK(last_match_info->HasFastObjectElements());
994   CHECK(result_array->HasFastObjectElements());
995 
996   subject = String::Flatten(subject);
997   CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
998 
999   if (regexp->CaptureCount() == 0) {
1000     return SearchRegExpMultiple<false>(isolate, subject, regexp,
1001                                        last_match_info, result_array);
1002   } else {
1003     return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1004                                       result_array);
1005   }
1006 }
1007 
1008 
RUNTIME_FUNCTION(Runtime_RegExpExecReThrow)1009 RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
1010   SealHandleScope shs(isolate);
1011   DCHECK(args.length() == 4);
1012   Object* exception = isolate->pending_exception();
1013   isolate->clear_pending_exception();
1014   return isolate->ReThrow(exception);
1015 }
1016 
1017 
RUNTIME_FUNCTION(Runtime_IsRegExp)1018 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1019   SealHandleScope shs(isolate);
1020   DCHECK(args.length() == 1);
1021   CONVERT_ARG_CHECKED(Object, obj, 0);
1022   return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1023 }
1024 }  // namespace internal
1025 }  // namespace v8
1026