1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/runtime/runtime-utils.h"
6
7 #include "src/arguments.h"
8 #include "src/conversions-inl.h"
9 #include "src/isolate-inl.h"
10 #include "src/messages.h"
11 #include "src/regexp/jsregexp-inl.h"
12 #include "src/regexp/jsregexp.h"
13 #include "src/regexp/regexp-utils.h"
14 #include "src/string-builder.h"
15 #include "src/string-search.h"
16
17 namespace v8 {
18 namespace internal {
19
20 class CompiledReplacement {
21 public:
CompiledReplacement(Zone * zone)22 explicit CompiledReplacement(Zone* zone)
23 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
24
25 // Return whether the replacement is simple.
26 bool Compile(Handle<String> replacement, int capture_count,
27 int subject_length);
28
29 // Use Apply only if Compile returned false.
30 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
31 int32_t* match);
32
33 // Number of distinct parts of the replacement pattern.
parts()34 int parts() { return parts_.length(); }
35
zone() const36 Zone* zone() const { return zone_; }
37
38 private:
39 enum PartType {
40 SUBJECT_PREFIX = 1,
41 SUBJECT_SUFFIX,
42 SUBJECT_CAPTURE,
43 REPLACEMENT_SUBSTRING,
44 REPLACEMENT_STRING,
45 NUMBER_OF_PART_TYPES
46 };
47
48 struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart49 static inline ReplacementPart SubjectMatch() {
50 return ReplacementPart(SUBJECT_CAPTURE, 0);
51 }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart52 static inline ReplacementPart SubjectCapture(int capture_index) {
53 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
54 }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart55 static inline ReplacementPart SubjectPrefix() {
56 return ReplacementPart(SUBJECT_PREFIX, 0);
57 }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart58 static inline ReplacementPart SubjectSuffix(int subject_length) {
59 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
60 }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart61 static inline ReplacementPart ReplacementString() {
62 return ReplacementPart(REPLACEMENT_STRING, 0);
63 }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart64 static inline ReplacementPart ReplacementSubString(int from, int to) {
65 DCHECK(from >= 0);
66 DCHECK(to > from);
67 return ReplacementPart(-from, to);
68 }
69
70 // If tag <= 0 then it is the negation of a start index of a substring of
71 // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart72 ReplacementPart(int tag, int data) : tag(tag), data(data) {
73 // Must be non-positive or a PartType value.
74 DCHECK(tag < NUMBER_OF_PART_TYPES);
75 }
76 // Either a value of PartType or a non-positive number that is
77 // the negation of an index into the replacement string.
78 int tag;
79 // The data value's interpretation depends on the value of tag:
80 // tag == SUBJECT_PREFIX ||
81 // tag == SUBJECT_SUFFIX: data is unused.
82 // tag == SUBJECT_CAPTURE: data is the number of the capture.
83 // tag == REPLACEMENT_SUBSTRING ||
84 // tag == REPLACEMENT_STRING: data is index into array of substrings
85 // of the replacement string.
86 // tag <= 0: Temporary representation of the substring of the replacement
87 // string ranging over -tag .. data.
88 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
89 // substring objects.
90 int data;
91 };
92
93 template <typename Char>
ParseReplacementPattern(ZoneList<ReplacementPart> * parts,Vector<Char> characters,int capture_count,int subject_length,Zone * zone)94 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
95 Vector<Char> characters, int capture_count,
96 int subject_length, Zone* zone) {
97 int length = characters.length();
98 int last = 0;
99 for (int i = 0; i < length; i++) {
100 Char c = characters[i];
101 if (c == '$') {
102 int next_index = i + 1;
103 if (next_index == length) { // No next character!
104 break;
105 }
106 Char c2 = characters[next_index];
107 switch (c2) {
108 case '$':
109 if (i > last) {
110 // There is a substring before. Include the first "$".
111 parts->Add(
112 ReplacementPart::ReplacementSubString(last, next_index),
113 zone);
114 last = next_index + 1; // Continue after the second "$".
115 } else {
116 // Let the next substring start with the second "$".
117 last = next_index;
118 }
119 i = next_index;
120 break;
121 case '`':
122 if (i > last) {
123 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
124 }
125 parts->Add(ReplacementPart::SubjectPrefix(), zone);
126 i = next_index;
127 last = i + 1;
128 break;
129 case '\'':
130 if (i > last) {
131 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
132 }
133 parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
134 i = next_index;
135 last = i + 1;
136 break;
137 case '&':
138 if (i > last) {
139 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
140 }
141 parts->Add(ReplacementPart::SubjectMatch(), zone);
142 i = next_index;
143 last = i + 1;
144 break;
145 case '0':
146 case '1':
147 case '2':
148 case '3':
149 case '4':
150 case '5':
151 case '6':
152 case '7':
153 case '8':
154 case '9': {
155 int capture_ref = c2 - '0';
156 if (capture_ref > capture_count) {
157 i = next_index;
158 continue;
159 }
160 int second_digit_index = next_index + 1;
161 if (second_digit_index < length) {
162 // Peek ahead to see if we have two digits.
163 Char c3 = characters[second_digit_index];
164 if ('0' <= c3 && c3 <= '9') { // Double digits.
165 int double_digit_ref = capture_ref * 10 + c3 - '0';
166 if (double_digit_ref <= capture_count) {
167 next_index = second_digit_index;
168 capture_ref = double_digit_ref;
169 }
170 }
171 }
172 if (capture_ref > 0) {
173 if (i > last) {
174 parts->Add(ReplacementPart::ReplacementSubString(last, i),
175 zone);
176 }
177 DCHECK(capture_ref <= capture_count);
178 parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
179 last = next_index + 1;
180 }
181 i = next_index;
182 break;
183 }
184 default:
185 i = next_index;
186 break;
187 }
188 }
189 }
190 if (length > last) {
191 if (last == 0) {
192 // Replacement is simple. Do not use Apply to do the replacement.
193 return true;
194 } else {
195 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
196 }
197 }
198 return false;
199 }
200
201 ZoneList<ReplacementPart> parts_;
202 ZoneList<Handle<String> > replacement_substrings_;
203 Zone* zone_;
204 };
205
206
Compile(Handle<String> replacement,int capture_count,int subject_length)207 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
208 int subject_length) {
209 {
210 DisallowHeapAllocation no_gc;
211 String::FlatContent content = replacement->GetFlatContent();
212 DCHECK(content.IsFlat());
213 bool simple = false;
214 if (content.IsOneByte()) {
215 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
216 capture_count, subject_length, zone());
217 } else {
218 DCHECK(content.IsTwoByte());
219 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
220 capture_count, subject_length, zone());
221 }
222 if (simple) return true;
223 }
224
225 Isolate* isolate = replacement->GetIsolate();
226 // Find substrings of replacement string and create them as String objects.
227 int substring_index = 0;
228 for (int i = 0, n = parts_.length(); i < n; i++) {
229 int tag = parts_[i].tag;
230 if (tag <= 0) { // A replacement string slice.
231 int from = -tag;
232 int to = parts_[i].data;
233 replacement_substrings_.Add(
234 isolate->factory()->NewSubString(replacement, from, to), zone());
235 parts_[i].tag = REPLACEMENT_SUBSTRING;
236 parts_[i].data = substring_index;
237 substring_index++;
238 } else if (tag == REPLACEMENT_STRING) {
239 replacement_substrings_.Add(replacement, zone());
240 parts_[i].data = substring_index;
241 substring_index++;
242 }
243 }
244 return false;
245 }
246
247
Apply(ReplacementStringBuilder * builder,int match_from,int match_to,int32_t * match)248 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
249 int match_from, int match_to, int32_t* match) {
250 DCHECK_LT(0, parts_.length());
251 for (int i = 0, n = parts_.length(); i < n; i++) {
252 ReplacementPart part = parts_[i];
253 switch (part.tag) {
254 case SUBJECT_PREFIX:
255 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
256 break;
257 case SUBJECT_SUFFIX: {
258 int subject_length = part.data;
259 if (match_to < subject_length) {
260 builder->AddSubjectSlice(match_to, subject_length);
261 }
262 break;
263 }
264 case SUBJECT_CAPTURE: {
265 int capture = part.data;
266 int from = match[capture * 2];
267 int to = match[capture * 2 + 1];
268 if (from >= 0 && to > from) {
269 builder->AddSubjectSlice(from, to);
270 }
271 break;
272 }
273 case REPLACEMENT_SUBSTRING:
274 case REPLACEMENT_STRING:
275 builder->AddString(replacement_substrings_[part.data]);
276 break;
277 default:
278 UNREACHABLE();
279 }
280 }
281 }
282
FindOneByteStringIndices(Vector<const uint8_t> subject,uint8_t pattern,List<int> * indices,unsigned int limit)283 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
284 List<int>* indices, unsigned int limit) {
285 DCHECK(limit > 0);
286 // Collect indices of pattern in subject using memchr.
287 // Stop after finding at most limit values.
288 const uint8_t* subject_start = subject.start();
289 const uint8_t* subject_end = subject_start + subject.length();
290 const uint8_t* pos = subject_start;
291 while (limit > 0) {
292 pos = reinterpret_cast<const uint8_t*>(
293 memchr(pos, pattern, subject_end - pos));
294 if (pos == NULL) return;
295 indices->Add(static_cast<int>(pos - subject_start));
296 pos++;
297 limit--;
298 }
299 }
300
FindTwoByteStringIndices(const Vector<const uc16> subject,uc16 pattern,List<int> * indices,unsigned int limit)301 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
302 List<int>* indices, unsigned int limit) {
303 DCHECK(limit > 0);
304 const uc16* subject_start = subject.start();
305 const uc16* subject_end = subject_start + subject.length();
306 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
307 if (*pos == pattern) {
308 indices->Add(static_cast<int>(pos - subject_start));
309 limit--;
310 }
311 }
312 }
313
314 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate * isolate,Vector<const SubjectChar> subject,Vector<const PatternChar> pattern,List<int> * indices,unsigned int limit)315 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
316 Vector<const PatternChar> pattern, List<int>* indices,
317 unsigned int limit) {
318 DCHECK(limit > 0);
319 // Collect indices of pattern in subject.
320 // Stop after finding at most limit values.
321 int pattern_length = pattern.length();
322 int index = 0;
323 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
324 while (limit > 0) {
325 index = search.Search(subject, index);
326 if (index < 0) return;
327 indices->Add(index);
328 index += pattern_length;
329 limit--;
330 }
331 }
332
FindStringIndicesDispatch(Isolate * isolate,String * subject,String * pattern,List<int> * indices,unsigned int limit)333 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
334 String* pattern, List<int>* indices,
335 unsigned int limit) {
336 {
337 DisallowHeapAllocation no_gc;
338 String::FlatContent subject_content = subject->GetFlatContent();
339 String::FlatContent pattern_content = pattern->GetFlatContent();
340 DCHECK(subject_content.IsFlat());
341 DCHECK(pattern_content.IsFlat());
342 if (subject_content.IsOneByte()) {
343 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
344 if (pattern_content.IsOneByte()) {
345 Vector<const uint8_t> pattern_vector =
346 pattern_content.ToOneByteVector();
347 if (pattern_vector.length() == 1) {
348 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
349 limit);
350 } else {
351 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
352 limit);
353 }
354 } else {
355 FindStringIndices(isolate, subject_vector,
356 pattern_content.ToUC16Vector(), indices, limit);
357 }
358 } else {
359 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
360 if (pattern_content.IsOneByte()) {
361 Vector<const uint8_t> pattern_vector =
362 pattern_content.ToOneByteVector();
363 if (pattern_vector.length() == 1) {
364 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
365 limit);
366 } else {
367 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
368 limit);
369 }
370 } else {
371 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
372 if (pattern_vector.length() == 1) {
373 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
374 limit);
375 } else {
376 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
377 limit);
378 }
379 }
380 }
381 }
382 }
383
384 namespace {
GetRewoundRegexpIndicesList(Isolate * isolate)385 List<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
386 List<int>* list = isolate->regexp_indices();
387 list->Rewind(0);
388 return list;
389 }
390
TruncateRegexpIndicesList(Isolate * isolate)391 void TruncateRegexpIndicesList(Isolate* isolate) {
392 // Same size as smallest zone segment, preserving behavior from the
393 // runtime zone.
394 static const int kMaxRegexpIndicesListCapacity = 8 * KB;
395 if (isolate->regexp_indices()->capacity() > kMaxRegexpIndicesListCapacity) {
396 isolate->regexp_indices()->Clear(); // Throw away backing storage
397 }
398 }
399 } // namespace
400
401 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> pattern_regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)402 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
403 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
404 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
405 DCHECK(subject->IsFlat());
406 DCHECK(replacement->IsFlat());
407
408 List<int>* indices = GetRewoundRegexpIndicesList(isolate);
409
410 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
411 String* pattern =
412 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
413 int subject_len = subject->length();
414 int pattern_len = pattern->length();
415 int replacement_len = replacement->length();
416
417 FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xffffffff);
418
419 int matches = indices->length();
420 if (matches == 0) return *subject;
421
422 // Detect integer overflow.
423 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
424 static_cast<int64_t>(pattern_len)) *
425 static_cast<int64_t>(matches) +
426 static_cast<int64_t>(subject_len);
427 int result_len;
428 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
429 STATIC_ASSERT(String::kMaxLength < kMaxInt);
430 result_len = kMaxInt; // Provoke exception.
431 } else {
432 result_len = static_cast<int>(result_len_64);
433 }
434 if (result_len == 0) {
435 return isolate->heap()->empty_string();
436 }
437
438 int subject_pos = 0;
439 int result_pos = 0;
440
441 MaybeHandle<SeqString> maybe_res;
442 if (ResultSeqString::kHasOneByteEncoding) {
443 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
444 } else {
445 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
446 }
447 Handle<SeqString> untyped_res;
448 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
449 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
450
451 for (int i = 0; i < matches; i++) {
452 // Copy non-matched subject content.
453 if (subject_pos < indices->at(i)) {
454 String::WriteToFlat(*subject, result->GetChars() + result_pos,
455 subject_pos, indices->at(i));
456 result_pos += indices->at(i) - subject_pos;
457 }
458
459 // Replace match.
460 if (replacement_len > 0) {
461 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
462 replacement_len);
463 result_pos += replacement_len;
464 }
465
466 subject_pos = indices->at(i) + pattern_len;
467 }
468 // Add remaining subject content at the end.
469 if (subject_pos < subject_len) {
470 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
471 subject_len);
472 }
473
474 int32_t match_indices[] = {indices->at(matches - 1),
475 indices->at(matches - 1) + pattern_len};
476 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
477
478 TruncateRegexpIndicesList(isolate);
479
480 return *result;
481 }
482
StringReplaceGlobalRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)483 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
484 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
485 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
486 DCHECK(subject->IsFlat());
487 DCHECK(replacement->IsFlat());
488
489 int capture_count = regexp->CaptureCount();
490 int subject_length = subject->length();
491
492 // CompiledReplacement uses zone allocation.
493 Zone zone(isolate->allocator(), ZONE_NAME);
494 CompiledReplacement compiled_replacement(&zone);
495 bool simple_replace =
496 compiled_replacement.Compile(replacement, capture_count, subject_length);
497
498 // Shortcut for simple non-regexp global replacements
499 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
500 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
501 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
502 isolate, subject, regexp, replacement, last_match_info);
503 } else {
504 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
505 isolate, subject, regexp, replacement, last_match_info);
506 }
507 }
508
509 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
510 if (global_cache.HasException()) return isolate->heap()->exception();
511
512 int32_t* current_match = global_cache.FetchNext();
513 if (current_match == NULL) {
514 if (global_cache.HasException()) return isolate->heap()->exception();
515 return *subject;
516 }
517
518 // Guessing the number of parts that the final result string is built
519 // from. Global regexps can match any number of times, so we guess
520 // conservatively.
521 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
522 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
523
524 // Number of parts added by compiled replacement plus preceeding
525 // string and possibly suffix after last match. It is possible for
526 // all components to use two elements when encoded as two smis.
527 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
528
529 int prev = 0;
530
531 do {
532 builder.EnsureCapacity(parts_added_per_loop);
533
534 int start = current_match[0];
535 int end = current_match[1];
536
537 if (prev < start) {
538 builder.AddSubjectSlice(prev, start);
539 }
540
541 if (simple_replace) {
542 builder.AddString(replacement);
543 } else {
544 compiled_replacement.Apply(&builder, start, end, current_match);
545 }
546 prev = end;
547
548 current_match = global_cache.FetchNext();
549 } while (current_match != NULL);
550
551 if (global_cache.HasException()) return isolate->heap()->exception();
552
553 if (prev < subject_length) {
554 builder.EnsureCapacity(2);
555 builder.AddSubjectSlice(prev, subject_length);
556 }
557
558 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
559 global_cache.LastSuccessfulMatch());
560
561 RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
562 }
563
564 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_info)565 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
566 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
567 Handle<RegExpMatchInfo> last_match_info) {
568 DCHECK(subject->IsFlat());
569
570 // Shortcut for simple non-regexp global replacements
571 if (regexp->TypeTag() == JSRegExp::ATOM) {
572 Handle<String> empty_string = isolate->factory()->empty_string();
573 if (subject->IsOneByteRepresentation()) {
574 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
575 isolate, subject, regexp, empty_string, last_match_info);
576 } else {
577 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
578 isolate, subject, regexp, empty_string, last_match_info);
579 }
580 }
581
582 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
583 if (global_cache.HasException()) return isolate->heap()->exception();
584
585 int32_t* current_match = global_cache.FetchNext();
586 if (current_match == NULL) {
587 if (global_cache.HasException()) return isolate->heap()->exception();
588 return *subject;
589 }
590
591 int start = current_match[0];
592 int end = current_match[1];
593 int capture_count = regexp->CaptureCount();
594 int subject_length = subject->length();
595
596 int new_length = subject_length - (end - start);
597 if (new_length == 0) return isolate->heap()->empty_string();
598
599 Handle<ResultSeqString> answer;
600 if (ResultSeqString::kHasOneByteEncoding) {
601 answer = Handle<ResultSeqString>::cast(
602 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
603 } else {
604 answer = Handle<ResultSeqString>::cast(
605 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
606 }
607
608 int prev = 0;
609 int position = 0;
610
611 do {
612 start = current_match[0];
613 end = current_match[1];
614 if (prev < start) {
615 // Add substring subject[prev;start] to answer string.
616 String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
617 position += start - prev;
618 }
619 prev = end;
620
621 current_match = global_cache.FetchNext();
622 } while (current_match != NULL);
623
624 if (global_cache.HasException()) return isolate->heap()->exception();
625
626 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
627 global_cache.LastSuccessfulMatch());
628
629 if (prev < subject_length) {
630 // Add substring subject[prev;length] to answer string.
631 String::WriteToFlat(*subject, answer->GetChars() + position, prev,
632 subject_length);
633 position += subject_length - prev;
634 }
635
636 if (position == 0) return isolate->heap()->empty_string();
637
638 // Shorten string and fill
639 int string_size = ResultSeqString::SizeFor(position);
640 int allocated_string_size = ResultSeqString::SizeFor(new_length);
641 int delta = allocated_string_size - string_size;
642
643 answer->set_length(position);
644 if (delta == 0) return *answer;
645
646 Address end_of_string = answer->address() + string_size;
647 Heap* heap = isolate->heap();
648
649 // The trimming is performed on a newly allocated object, which is on a
650 // fresly allocated page or on an already swept page. Hence, the sweeper
651 // thread can not get confused with the filler creation. No synchronization
652 // needed.
653 // TODO(hpayer): We should shrink the large object page if the size
654 // of the object changed significantly.
655 if (!heap->lo_space()->Contains(*answer)) {
656 heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
657 }
658 heap->AdjustLiveBytes(*answer, -delta);
659 return *answer;
660 }
661
662 namespace {
663
StringReplaceGlobalRegExpWithStringHelper(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)664 Object* StringReplaceGlobalRegExpWithStringHelper(
665 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
666 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
667 CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
668
669 subject = String::Flatten(subject);
670
671 if (replacement->length() == 0) {
672 if (subject->HasOnlyOneByteChars()) {
673 return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
674 isolate, subject, regexp, last_match_info);
675 } else {
676 return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
677 isolate, subject, regexp, last_match_info);
678 }
679 }
680
681 replacement = String::Flatten(replacement);
682
683 return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
684 replacement, last_match_info);
685 }
686
687 } // namespace
688
RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString)689 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
690 HandleScope scope(isolate);
691 DCHECK_EQ(4, args.length());
692
693 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
694 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
695 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
696 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
697
698 return StringReplaceGlobalRegExpWithStringHelper(
699 isolate, regexp, subject, replacement, last_match_info);
700 }
701
RUNTIME_FUNCTION(Runtime_StringSplit)702 RUNTIME_FUNCTION(Runtime_StringSplit) {
703 HandleScope handle_scope(isolate);
704 DCHECK_EQ(3, args.length());
705 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
706 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
707 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
708 CHECK(limit > 0);
709
710 int subject_length = subject->length();
711 int pattern_length = pattern->length();
712 CHECK(pattern_length > 0);
713
714 if (limit == 0xffffffffu) {
715 FixedArray* last_match_cache_unused;
716 Handle<Object> cached_answer(
717 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
718 &last_match_cache_unused,
719 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
720 isolate);
721 if (*cached_answer != Smi::kZero) {
722 // The cache FixedArray is a COW-array and can therefore be reused.
723 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
724 Handle<FixedArray>::cast(cached_answer));
725 return *result;
726 }
727 }
728
729 // The limit can be very large (0xffffffffu), but since the pattern
730 // isn't empty, we can never create more parts than ~half the length
731 // of the subject.
732
733 subject = String::Flatten(subject);
734 pattern = String::Flatten(pattern);
735
736 List<int>* indices = GetRewoundRegexpIndicesList(isolate);
737
738 FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
739
740 if (static_cast<uint32_t>(indices->length()) < limit) {
741 indices->Add(subject_length);
742 }
743
744 // The list indices now contains the end of each part to create.
745
746 // Create JSArray of substrings separated by separator.
747 int part_count = indices->length();
748
749 Handle<JSArray> result =
750 isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count,
751 INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
752
753 DCHECK(result->HasFastObjectElements());
754
755 Handle<FixedArray> elements(FixedArray::cast(result->elements()));
756
757 if (part_count == 1 && indices->at(0) == subject_length) {
758 elements->set(0, *subject);
759 } else {
760 int part_start = 0;
761 FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
762 int part_end = indices->at(i);
763 Handle<String> substring =
764 isolate->factory()->NewProperSubString(subject, part_start, part_end);
765 elements->set(i, *substring);
766 part_start = part_end + pattern_length;
767 });
768 }
769
770 if (limit == 0xffffffffu) {
771 if (result->HasFastObjectElements()) {
772 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
773 isolate->factory()->empty_fixed_array(),
774 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
775 }
776 }
777
778 TruncateRegexpIndicesList(isolate);
779
780 return *result;
781 }
782
783 // ES##sec-regexpcreate
784 // RegExpCreate ( P, F )
RUNTIME_FUNCTION(Runtime_RegExpCreate)785 RUNTIME_FUNCTION(Runtime_RegExpCreate) {
786 HandleScope scope(isolate);
787 DCHECK_EQ(1, args.length());
788 CONVERT_ARG_HANDLE_CHECKED(Object, source_object, 0);
789
790 Handle<String> source;
791 if (source_object->IsUndefined(isolate)) {
792 source = isolate->factory()->empty_string();
793 } else {
794 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
795 isolate, source, Object::ToString(isolate, source_object));
796 }
797
798 Handle<Map> map(isolate->regexp_function()->initial_map());
799 Handle<JSRegExp> regexp =
800 Handle<JSRegExp>::cast(isolate->factory()->NewJSObjectFromMap(map));
801
802 JSRegExp::Flags flags = JSRegExp::kNone;
803
804 RETURN_FAILURE_ON_EXCEPTION(isolate,
805 JSRegExp::Initialize(regexp, source, flags));
806
807 return *regexp;
808 }
809
RUNTIME_FUNCTION(Runtime_RegExpExec)810 RUNTIME_FUNCTION(Runtime_RegExpExec) {
811 HandleScope scope(isolate);
812 DCHECK_EQ(4, args.length());
813 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
814 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
815 CONVERT_INT32_ARG_CHECKED(index, 2);
816 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
817 // Due to the way the JS calls are constructed this must be less than the
818 // length of a string, i.e. it is always a Smi. We check anyway for security.
819 CHECK(index >= 0);
820 CHECK(index <= subject->length());
821 isolate->counters()->regexp_entry_runtime()->Increment();
822 RETURN_RESULT_OR_FAILURE(
823 isolate, RegExpImpl::Exec(regexp, subject, index, last_match_info));
824 }
825
RUNTIME_FUNCTION(Runtime_RegExpInternalReplace)826 RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) {
827 HandleScope scope(isolate);
828 DCHECK_EQ(3, args.length());
829 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
830 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
831 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
832
833 Handle<RegExpMatchInfo> internal_match_info =
834 isolate->regexp_internal_match_info();
835
836 return StringReplaceGlobalRegExpWithStringHelper(
837 isolate, regexp, subject, replacement, internal_match_info);
838 }
839
840 namespace {
841
842 class MatchInfoBackedMatch : public String::Match {
843 public:
MatchInfoBackedMatch(Isolate * isolate,Handle<String> subject,Handle<RegExpMatchInfo> match_info)844 MatchInfoBackedMatch(Isolate* isolate, Handle<String> subject,
845 Handle<RegExpMatchInfo> match_info)
846 : isolate_(isolate), match_info_(match_info) {
847 subject_ = String::Flatten(subject);
848 }
849
GetMatch()850 Handle<String> GetMatch() override {
851 return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
852 }
853
GetCapture(int i,bool * capture_exists)854 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
855 Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
856 isolate_, match_info_, i, capture_exists);
857 return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
858 : isolate_->factory()->empty_string();
859 }
860
GetPrefix()861 Handle<String> GetPrefix() override {
862 const int match_start = match_info_->Capture(0);
863 return isolate_->factory()->NewSubString(subject_, 0, match_start);
864 }
865
GetSuffix()866 Handle<String> GetSuffix() override {
867 const int match_end = match_info_->Capture(1);
868 return isolate_->factory()->NewSubString(subject_, match_end,
869 subject_->length());
870 }
871
CaptureCount()872 int CaptureCount() override {
873 return match_info_->NumberOfCaptureRegisters() / 2;
874 }
875
~MatchInfoBackedMatch()876 virtual ~MatchInfoBackedMatch() {}
877
878 private:
879 Isolate* isolate_;
880 Handle<String> subject_;
881 Handle<RegExpMatchInfo> match_info_;
882 };
883
884 class VectorBackedMatch : public String::Match {
885 public:
VectorBackedMatch(Isolate * isolate,Handle<String> subject,Handle<String> match,int match_position,ZoneVector<Handle<Object>> * captures)886 VectorBackedMatch(Isolate* isolate, Handle<String> subject,
887 Handle<String> match, int match_position,
888 ZoneVector<Handle<Object>>* captures)
889 : isolate_(isolate),
890 match_(match),
891 match_position_(match_position),
892 captures_(captures) {
893 subject_ = String::Flatten(subject);
894 }
895
GetMatch()896 Handle<String> GetMatch() override { return match_; }
897
GetCapture(int i,bool * capture_exists)898 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
899 Handle<Object> capture_obj = captures_->at(i);
900 if (capture_obj->IsUndefined(isolate_)) {
901 *capture_exists = false;
902 return isolate_->factory()->empty_string();
903 }
904 *capture_exists = true;
905 return Object::ToString(isolate_, capture_obj);
906 }
907
GetPrefix()908 Handle<String> GetPrefix() override {
909 return isolate_->factory()->NewSubString(subject_, 0, match_position_);
910 }
911
GetSuffix()912 Handle<String> GetSuffix() override {
913 const int match_end_position = match_position_ + match_->length();
914 return isolate_->factory()->NewSubString(subject_, match_end_position,
915 subject_->length());
916 }
917
CaptureCount()918 int CaptureCount() override { return static_cast<int>(captures_->size()); }
919
~VectorBackedMatch()920 virtual ~VectorBackedMatch() {}
921
922 private:
923 Isolate* isolate_;
924 Handle<String> subject_;
925 Handle<String> match_;
926 const int match_position_;
927 ZoneVector<Handle<Object>>* captures_;
928 };
929
930 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
931 // separate last match info. See comment on that function.
932 template <bool has_capture>
SearchRegExpMultiple(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_array,Handle<JSArray> result_array)933 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
934 Handle<JSRegExp> regexp,
935 Handle<RegExpMatchInfo> last_match_array,
936 Handle<JSArray> result_array) {
937 DCHECK(subject->IsFlat());
938 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
939
940 int capture_count = regexp->CaptureCount();
941 int subject_length = subject->length();
942
943 static const int kMinLengthToCache = 0x1000;
944
945 if (subject_length > kMinLengthToCache) {
946 FixedArray* last_match_cache;
947 Object* cached_answer = RegExpResultsCache::Lookup(
948 isolate->heap(), *subject, regexp->data(), &last_match_cache,
949 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
950 if (cached_answer->IsFixedArray()) {
951 int capture_registers = (capture_count + 1) * 2;
952 int32_t* last_match = NewArray<int32_t>(capture_registers);
953 for (int i = 0; i < capture_registers; i++) {
954 last_match[i] = Smi::cast(last_match_cache->get(i))->value();
955 }
956 Handle<FixedArray> cached_fixed_array =
957 Handle<FixedArray>(FixedArray::cast(cached_answer));
958 // The cache FixedArray is a COW-array and we need to return a copy.
959 Handle<FixedArray> copied_fixed_array =
960 isolate->factory()->CopyFixedArrayWithMap(
961 cached_fixed_array, isolate->factory()->fixed_array_map());
962 JSArray::SetContent(result_array, copied_fixed_array);
963 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
964 last_match);
965 DeleteArray(last_match);
966 return *result_array;
967 }
968 }
969
970 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
971 if (global_cache.HasException()) return isolate->heap()->exception();
972
973 // Ensured in Runtime_RegExpExecMultiple.
974 DCHECK(result_array->HasFastObjectElements());
975 Handle<FixedArray> result_elements(
976 FixedArray::cast(result_array->elements()));
977 if (result_elements->length() < 16) {
978 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
979 }
980
981 FixedArrayBuilder builder(result_elements);
982
983 // Position to search from.
984 int match_start = -1;
985 int match_end = 0;
986 bool first = true;
987
988 // Two smis before and after the match, for very long strings.
989 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
990
991 while (true) {
992 int32_t* current_match = global_cache.FetchNext();
993 if (current_match == NULL) break;
994 match_start = current_match[0];
995 builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
996 if (match_end < match_start) {
997 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
998 match_start);
999 }
1000 match_end = current_match[1];
1001 {
1002 // Avoid accumulating new handles inside loop.
1003 HandleScope temp_scope(isolate);
1004 Handle<String> match;
1005 if (!first) {
1006 match = isolate->factory()->NewProperSubString(subject, match_start,
1007 match_end);
1008 } else {
1009 match =
1010 isolate->factory()->NewSubString(subject, match_start, match_end);
1011 first = false;
1012 }
1013
1014 if (has_capture) {
1015 // Arguments array to replace function is match, captures, index and
1016 // subject, i.e., 3 + capture count in total.
1017 Handle<FixedArray> elements =
1018 isolate->factory()->NewFixedArray(3 + capture_count);
1019
1020 elements->set(0, *match);
1021 for (int i = 1; i <= capture_count; i++) {
1022 int start = current_match[i * 2];
1023 if (start >= 0) {
1024 int end = current_match[i * 2 + 1];
1025 DCHECK(start <= end);
1026 Handle<String> substring =
1027 isolate->factory()->NewSubString(subject, start, end);
1028 elements->set(i, *substring);
1029 } else {
1030 DCHECK(current_match[i * 2 + 1] < 0);
1031 elements->set(i, isolate->heap()->undefined_value());
1032 }
1033 }
1034 elements->set(capture_count + 1, Smi::FromInt(match_start));
1035 elements->set(capture_count + 2, *subject);
1036 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1037 } else {
1038 builder.Add(*match);
1039 }
1040 }
1041 }
1042
1043 if (global_cache.HasException()) return isolate->heap()->exception();
1044
1045 if (match_start >= 0) {
1046 // Finished matching, with at least one match.
1047 if (match_end < subject_length) {
1048 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1049 subject_length);
1050 }
1051
1052 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
1053 global_cache.LastSuccessfulMatch());
1054
1055 if (subject_length > kMinLengthToCache) {
1056 // Store the last successful match into the array for caching.
1057 // TODO(yangguo): do not expose last match to JS and simplify caching.
1058 int capture_registers = (capture_count + 1) * 2;
1059 Handle<FixedArray> last_match_cache =
1060 isolate->factory()->NewFixedArray(capture_registers);
1061 int32_t* last_match = global_cache.LastSuccessfulMatch();
1062 for (int i = 0; i < capture_registers; i++) {
1063 last_match_cache->set(i, Smi::FromInt(last_match[i]));
1064 }
1065 Handle<FixedArray> result_fixed_array = builder.array();
1066 result_fixed_array->Shrink(builder.length());
1067 // Cache the result and copy the FixedArray into a COW array.
1068 Handle<FixedArray> copied_fixed_array =
1069 isolate->factory()->CopyFixedArrayWithMap(
1070 result_fixed_array, isolate->factory()->fixed_array_map());
1071 RegExpResultsCache::Enter(
1072 isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1073 last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1074 }
1075 return *builder.ToJSArray(result_array);
1076 } else {
1077 return isolate->heap()->null_value(); // No matches at all.
1078 }
1079 }
1080
StringReplaceNonGlobalRegExpWithFunction(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replace_obj)1081 MUST_USE_RESULT MaybeHandle<String> StringReplaceNonGlobalRegExpWithFunction(
1082 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
1083 Handle<String> replace_obj) {
1084 Factory* factory = isolate->factory();
1085 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1086
1087 const int flags = regexp->GetFlags();
1088
1089 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1090 DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1091
1092 // TODO(jgruber): This should be an easy port to CSA with massive payback.
1093
1094 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1095 uint32_t last_index = 0;
1096 if (sticky) {
1097 Handle<Object> last_index_obj(regexp->LastIndex(), isolate);
1098 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1099 Object::ToLength(isolate, last_index_obj),
1100 String);
1101 last_index = PositiveNumberToUint32(*last_index_obj);
1102
1103 if (static_cast<int>(last_index) > subject->length()) last_index = 0;
1104 }
1105
1106 Handle<Object> match_indices_obj;
1107 ASSIGN_RETURN_ON_EXCEPTION(
1108 isolate, match_indices_obj,
1109 RegExpImpl::Exec(regexp, subject, last_index, last_match_info), String);
1110
1111 if (match_indices_obj->IsNull(isolate)) {
1112 if (sticky) regexp->SetLastIndex(0);
1113 return subject;
1114 }
1115
1116 Handle<RegExpMatchInfo> match_indices =
1117 Handle<RegExpMatchInfo>::cast(match_indices_obj);
1118
1119 const int index = match_indices->Capture(0);
1120 const int end_of_match = match_indices->Capture(1);
1121
1122 if (sticky) regexp->SetLastIndex(end_of_match);
1123
1124 IncrementalStringBuilder builder(isolate);
1125 builder.AppendString(factory->NewSubString(subject, 0, index));
1126
1127 // Compute the parameter list consisting of the match, captures, index,
1128 // and subject for the replace function invocation.
1129 // The number of captures plus one for the match.
1130 const int m = match_indices->NumberOfCaptureRegisters() / 2;
1131
1132 const int argc = m + 2;
1133 ScopedVector<Handle<Object>> argv(argc);
1134
1135 for (int j = 0; j < m; j++) {
1136 bool ok;
1137 Handle<String> capture =
1138 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1139 if (ok) {
1140 argv[j] = capture;
1141 } else {
1142 argv[j] = factory->undefined_value();
1143 }
1144 }
1145
1146 argv[argc - 2] = handle(Smi::FromInt(index), isolate);
1147 argv[argc - 1] = subject;
1148
1149 Handle<Object> replacement_obj;
1150 ASSIGN_RETURN_ON_EXCEPTION(
1151 isolate, replacement_obj,
1152 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1153 argv.start()),
1154 String);
1155
1156 Handle<String> replacement;
1157 ASSIGN_RETURN_ON_EXCEPTION(
1158 isolate, replacement, Object::ToString(isolate, replacement_obj), String);
1159
1160 builder.AppendString(replacement);
1161 builder.AppendString(
1162 factory->NewSubString(subject, end_of_match, subject->length()));
1163
1164 return builder.Finish();
1165 }
1166
1167 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1168 // doesn't properly call the underlying exec method.
RegExpReplace(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> string,Handle<String> replace_obj)1169 MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
1170 Handle<JSRegExp> regexp,
1171 Handle<String> string,
1172 Handle<String> replace_obj) {
1173 Factory* factory = isolate->factory();
1174
1175 const int flags = regexp->GetFlags();
1176 const bool global = (flags & JSRegExp::kGlobal) != 0;
1177 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1178
1179 // Functional fast-paths are dispatched directly by replace builtin.
1180 DCHECK(!replace_obj->IsCallable());
1181
1182 replace_obj = String::Flatten(replace_obj);
1183
1184 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1185
1186 if (!global) {
1187 // Non-global regexp search, string replace.
1188
1189 uint32_t last_index = 0;
1190 if (sticky) {
1191 Handle<Object> last_index_obj(regexp->LastIndex(), isolate);
1192 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1193 Object::ToLength(isolate, last_index_obj),
1194 String);
1195 last_index = PositiveNumberToUint32(*last_index_obj);
1196
1197 if (static_cast<int>(last_index) > string->length()) last_index = 0;
1198 }
1199
1200 Handle<Object> match_indices_obj;
1201 ASSIGN_RETURN_ON_EXCEPTION(
1202 isolate, match_indices_obj,
1203 RegExpImpl::Exec(regexp, string, last_index, last_match_info), String);
1204
1205 if (match_indices_obj->IsNull(isolate)) {
1206 if (sticky) regexp->SetLastIndex(0);
1207 return string;
1208 }
1209
1210 auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1211
1212 const int start_index = match_indices->Capture(0);
1213 const int end_index = match_indices->Capture(1);
1214
1215 if (sticky) regexp->SetLastIndex(end_index);
1216
1217 IncrementalStringBuilder builder(isolate);
1218 builder.AppendString(factory->NewSubString(string, 0, start_index));
1219
1220 if (replace_obj->length() > 0) {
1221 MatchInfoBackedMatch m(isolate, string, match_indices);
1222 Handle<String> replacement;
1223 ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1224 String::GetSubstitution(isolate, &m, replace_obj),
1225 String);
1226 builder.AppendString(replacement);
1227 }
1228
1229 builder.AppendString(
1230 factory->NewSubString(string, end_index, string->length()));
1231 return builder.Finish();
1232 } else {
1233 // Global regexp search, string replace.
1234 DCHECK(global);
1235 RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1236 String);
1237
1238 if (replace_obj->length() == 0) {
1239 if (string->HasOnlyOneByteChars()) {
1240 Object* result =
1241 StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1242 isolate, string, regexp, last_match_info);
1243 return handle(String::cast(result), isolate);
1244 } else {
1245 Object* result =
1246 StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1247 isolate, string, regexp, last_match_info);
1248 return handle(String::cast(result), isolate);
1249 }
1250 }
1251
1252 Object* result = StringReplaceGlobalRegExpWithString(
1253 isolate, string, regexp, replace_obj, last_match_info);
1254 if (result->IsString()) {
1255 return handle(String::cast(result), isolate);
1256 } else {
1257 return MaybeHandle<String>();
1258 }
1259 }
1260
1261 UNREACHABLE();
1262 return MaybeHandle<String>();
1263 }
1264
1265 } // namespace
1266
1267 // This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)1268 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1269 HandleScope handles(isolate);
1270 DCHECK_EQ(4, args.length());
1271
1272 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1273 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1274 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1275 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1276
1277 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1278 CHECK(result_array->HasFastObjectElements());
1279
1280 subject = String::Flatten(subject);
1281 CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1282
1283 Object* result;
1284 if (regexp->CaptureCount() == 0) {
1285 result = SearchRegExpMultiple<false>(isolate, subject, regexp,
1286 last_match_info, result_array);
1287 } else {
1288 result = SearchRegExpMultiple<true>(isolate, subject, regexp,
1289 last_match_info, result_array);
1290 }
1291 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1292 return result;
1293 }
1294
RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction)1295 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1296 HandleScope scope(isolate);
1297 DCHECK_EQ(3, args.length());
1298
1299 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1300 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1301 CONVERT_ARG_HANDLE_CHECKED(String, replace, 2);
1302
1303 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1304
1305 RETURN_RESULT_OR_FAILURE(isolate, StringReplaceNonGlobalRegExpWithFunction(
1306 isolate, subject, regexp, replace));
1307 }
1308
1309 namespace {
1310
1311 // ES##sec-speciesconstructor
1312 // SpeciesConstructor ( O, defaultConstructor )
SpeciesConstructor(Isolate * isolate,Handle<JSReceiver> recv,Handle<JSFunction> default_ctor)1313 MUST_USE_RESULT MaybeHandle<Object> SpeciesConstructor(
1314 Isolate* isolate, Handle<JSReceiver> recv,
1315 Handle<JSFunction> default_ctor) {
1316 Handle<Object> ctor_obj;
1317 ASSIGN_RETURN_ON_EXCEPTION(
1318 isolate, ctor_obj,
1319 JSObject::GetProperty(recv, isolate->factory()->constructor_string()),
1320 Object);
1321
1322 if (ctor_obj->IsUndefined(isolate)) return default_ctor;
1323
1324 if (!ctor_obj->IsJSReceiver()) {
1325 THROW_NEW_ERROR(isolate,
1326 NewTypeError(MessageTemplate::kConstructorNotReceiver),
1327 Object);
1328 }
1329
1330 Handle<JSReceiver> ctor = Handle<JSReceiver>::cast(ctor_obj);
1331
1332 Handle<Object> species;
1333 ASSIGN_RETURN_ON_EXCEPTION(
1334 isolate, species,
1335 JSObject::GetProperty(ctor, isolate->factory()->species_symbol()),
1336 Object);
1337
1338 if (species->IsNullOrUndefined(isolate)) {
1339 return default_ctor;
1340 }
1341
1342 if (species->IsConstructor()) return species;
1343
1344 THROW_NEW_ERROR(
1345 isolate, NewTypeError(MessageTemplate::kSpeciesNotConstructor), Object);
1346 }
1347
ToUint32(Isolate * isolate,Handle<Object> object,uint32_t * out)1348 MUST_USE_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1349 Handle<Object> object,
1350 uint32_t* out) {
1351 if (object->IsUndefined(isolate)) {
1352 *out = kMaxUInt32;
1353 return object;
1354 }
1355
1356 Handle<Object> number;
1357 ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(object), Object);
1358 *out = NumberToUint32(*number);
1359 return object;
1360 }
1361
NewJSArrayWithElements(Isolate * isolate,Handle<FixedArray> elems,int num_elems)1362 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1363 Handle<FixedArray> elems,
1364 int num_elems) {
1365 elems->Shrink(num_elems);
1366 return isolate->factory()->NewJSArrayWithElements(elems);
1367 }
1368
1369 } // namespace
1370
1371 // Slow path for:
1372 // ES#sec-regexp.prototype-@@replace
1373 // RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit)1374 RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1375 HandleScope scope(isolate);
1376 DCHECK_EQ(3, args.length());
1377
1378 DCHECK(args[1]->IsString());
1379
1380 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1381 CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1382 CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1383
1384 Factory* factory = isolate->factory();
1385
1386 Handle<JSFunction> regexp_fun = isolate->regexp_function();
1387 Handle<Object> ctor;
1388 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1389 isolate, ctor, SpeciesConstructor(isolate, recv, regexp_fun));
1390
1391 Handle<Object> flags_obj;
1392 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1393 isolate, flags_obj, JSObject::GetProperty(recv, factory->flags_string()));
1394
1395 Handle<String> flags;
1396 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1397 Object::ToString(isolate, flags_obj));
1398
1399 Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1400 const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1401
1402 Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1403 const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1404
1405 Handle<String> new_flags = flags;
1406 if (!sticky) {
1407 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1408 factory->NewConsString(flags, y_str));
1409 }
1410
1411 Handle<JSReceiver> splitter;
1412 {
1413 const int argc = 2;
1414
1415 ScopedVector<Handle<Object>> argv(argc);
1416 argv[0] = recv;
1417 argv[1] = new_flags;
1418
1419 Handle<JSFunction> ctor_fun = Handle<JSFunction>::cast(ctor);
1420 Handle<Object> splitter_obj;
1421 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1422 isolate, splitter_obj, Execution::New(ctor_fun, argc, argv.start()));
1423
1424 splitter = Handle<JSReceiver>::cast(splitter_obj);
1425 }
1426
1427 uint32_t limit;
1428 RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1429
1430 const uint32_t length = string->length();
1431
1432 if (limit == 0) return *factory->NewJSArray(0);
1433
1434 if (length == 0) {
1435 Handle<Object> result;
1436 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1437 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1438 factory->undefined_value()));
1439
1440 if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1441
1442 Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1443 elems->set(0, *string);
1444 return *factory->NewJSArrayWithElements(elems);
1445 }
1446
1447 static const int kInitialArraySize = 8;
1448 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1449 int num_elems = 0;
1450
1451 uint32_t string_index = 0;
1452 uint32_t prev_string_index = 0;
1453 while (string_index < length) {
1454 RETURN_FAILURE_ON_EXCEPTION(
1455 isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1456
1457 Handle<Object> result;
1458 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1459 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1460 factory->undefined_value()));
1461
1462 if (result->IsNull(isolate)) {
1463 string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
1464 string_index, unicode);
1465 continue;
1466 }
1467
1468 Handle<Object> last_index_obj;
1469 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1470 isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1471
1472 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1473 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1474
1475 const uint32_t end =
1476 std::min(PositiveNumberToUint32(*last_index_obj), length);
1477 if (end == prev_string_index) {
1478 string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
1479 string_index, unicode);
1480 continue;
1481 }
1482
1483 {
1484 Handle<String> substr =
1485 factory->NewSubString(string, prev_string_index, string_index);
1486 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1487 if (static_cast<uint32_t>(num_elems) == limit) {
1488 return *NewJSArrayWithElements(isolate, elems, num_elems);
1489 }
1490 }
1491
1492 prev_string_index = end;
1493
1494 Handle<Object> num_captures_obj;
1495 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1496 isolate, num_captures_obj,
1497 Object::GetProperty(result, isolate->factory()->length_string()));
1498
1499 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1500 isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1501 const int num_captures = PositiveNumberToUint32(*num_captures_obj);
1502
1503 for (int i = 1; i < num_captures; i++) {
1504 Handle<Object> capture;
1505 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1506 isolate, capture, Object::GetElement(isolate, result, i));
1507 elems = FixedArray::SetAndGrow(elems, num_elems++, capture);
1508 if (static_cast<uint32_t>(num_elems) == limit) {
1509 return *NewJSArrayWithElements(isolate, elems, num_elems);
1510 }
1511 }
1512
1513 string_index = prev_string_index;
1514 }
1515
1516 {
1517 Handle<String> substr =
1518 factory->NewSubString(string, prev_string_index, length);
1519 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1520 }
1521
1522 return *NewJSArrayWithElements(isolate, elems, num_elems);
1523 }
1524
1525 // Slow path for:
1526 // ES#sec-regexp.prototype-@@replace
1527 // RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplace)1528 RUNTIME_FUNCTION(Runtime_RegExpReplace) {
1529 HandleScope scope(isolate);
1530 DCHECK_EQ(3, args.length());
1531
1532 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1533 CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1534 Handle<Object> replace_obj = args.at(2);
1535
1536 Factory* factory = isolate->factory();
1537
1538 string = String::Flatten(string);
1539
1540 const bool functional_replace = replace_obj->IsCallable();
1541
1542 Handle<String> replace;
1543 if (!functional_replace) {
1544 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1545 Object::ToString(isolate, replace_obj));
1546 }
1547
1548 // Fast-path for unmodified JSRegExps (and non-functional replace).
1549 if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1550 // We should never get here with functional replace because unmodified
1551 // regexp and functional replace should be fully handled in CSA code.
1552 CHECK(!functional_replace);
1553 Handle<Object> result;
1554 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1555 isolate, result,
1556 RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
1557 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
1558 return *result;
1559 }
1560
1561 const uint32_t length = string->length();
1562
1563 Handle<Object> global_obj;
1564 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1565 isolate, global_obj,
1566 JSReceiver::GetProperty(recv, factory->global_string()));
1567 const bool global = global_obj->BooleanValue();
1568
1569 bool unicode = false;
1570 if (global) {
1571 Handle<Object> unicode_obj;
1572 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1573 isolate, unicode_obj,
1574 JSReceiver::GetProperty(recv, factory->unicode_string()));
1575 unicode = unicode_obj->BooleanValue();
1576
1577 RETURN_FAILURE_ON_EXCEPTION(isolate,
1578 RegExpUtils::SetLastIndex(isolate, recv, 0));
1579 }
1580
1581 Zone zone(isolate->allocator(), ZONE_NAME);
1582 ZoneVector<Handle<Object>> results(&zone);
1583
1584 while (true) {
1585 Handle<Object> result;
1586 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1587 isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1588 factory->undefined_value()));
1589
1590 if (result->IsNull(isolate)) break;
1591
1592 results.push_back(result);
1593 if (!global) break;
1594
1595 Handle<Object> match_obj;
1596 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1597 Object::GetElement(isolate, result, 0));
1598
1599 Handle<String> match;
1600 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1601 Object::ToString(isolate, match_obj));
1602
1603 if (match->length() == 0) {
1604 RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1605 isolate, recv, string, unicode));
1606 }
1607 }
1608
1609 // TODO(jgruber): Look into ReplacementStringBuilder instead.
1610 IncrementalStringBuilder builder(isolate);
1611 uint32_t next_source_position = 0;
1612
1613 for (const auto& result : results) {
1614 Handle<Object> captures_length_obj;
1615 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1616 isolate, captures_length_obj,
1617 Object::GetProperty(result, factory->length_string()));
1618
1619 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1620 isolate, captures_length_obj,
1621 Object::ToLength(isolate, captures_length_obj));
1622 const int captures_length = PositiveNumberToUint32(*captures_length_obj);
1623
1624 Handle<Object> match_obj;
1625 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1626 Object::GetElement(isolate, result, 0));
1627
1628 Handle<String> match;
1629 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1630 Object::ToString(isolate, match_obj));
1631
1632 const int match_length = match->length();
1633
1634 Handle<Object> position_obj;
1635 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1636 isolate, position_obj,
1637 Object::GetProperty(result, factory->index_string()));
1638
1639 // TODO(jgruber): Extract and correct error handling. Since we can go up to
1640 // 2^53 - 1 (at least for ToLength), we might actually need uint64_t here?
1641 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1642 isolate, position_obj, Object::ToInteger(isolate, position_obj));
1643 const uint32_t position =
1644 std::min(PositiveNumberToUint32(*position_obj), length);
1645
1646 ZoneVector<Handle<Object>> captures(&zone);
1647 for (int n = 0; n < captures_length; n++) {
1648 Handle<Object> capture;
1649 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1650 isolate, capture, Object::GetElement(isolate, result, n));
1651
1652 if (!capture->IsUndefined(isolate)) {
1653 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1654 Object::ToString(isolate, capture));
1655 }
1656 captures.push_back(capture);
1657 }
1658
1659 Handle<String> replacement;
1660 if (functional_replace) {
1661 const int argc = captures_length + 2;
1662 ScopedVector<Handle<Object>> argv(argc);
1663
1664 for (int j = 0; j < captures_length; j++) {
1665 argv[j] = captures[j];
1666 }
1667
1668 argv[captures_length] = handle(Smi::FromInt(position), isolate);
1669 argv[captures_length + 1] = string;
1670
1671 Handle<Object> replacement_obj;
1672 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1673 isolate, replacement_obj,
1674 Execution::Call(isolate, replace_obj, factory->undefined_value(),
1675 argc, argv.start()));
1676
1677 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1678 isolate, replacement, Object::ToString(isolate, replacement_obj));
1679 } else {
1680 VectorBackedMatch m(isolate, string, match, position, &captures);
1681 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1682 isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1683 }
1684
1685 if (position >= next_source_position) {
1686 builder.AppendString(
1687 factory->NewSubString(string, next_source_position, position));
1688 builder.AppendString(replacement);
1689
1690 next_source_position = position + match_length;
1691 }
1692 }
1693
1694 if (next_source_position < length) {
1695 builder.AppendString(
1696 factory->NewSubString(string, next_source_position, length));
1697 }
1698
1699 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1700 }
1701
RUNTIME_FUNCTION(Runtime_RegExpExecReThrow)1702 RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
1703 SealHandleScope shs(isolate);
1704 DCHECK_EQ(4, args.length());
1705 Object* exception = isolate->pending_exception();
1706 isolate->clear_pending_exception();
1707 return isolate->ReThrow(exception);
1708 }
1709
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)1710 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1711 HandleScope scope(isolate);
1712 DCHECK_EQ(3, args.length());
1713 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1714 CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1715 CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1716
1717 RETURN_FAILURE_ON_EXCEPTION(isolate,
1718 JSRegExp::Initialize(regexp, source, flags));
1719
1720 return *regexp;
1721 }
1722
RUNTIME_FUNCTION(Runtime_IsRegExp)1723 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1724 SealHandleScope shs(isolate);
1725 DCHECK_EQ(1, args.length());
1726 CONVERT_ARG_CHECKED(Object, obj, 0);
1727 return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1728 }
1729
1730 } // namespace internal
1731 } // namespace v8
1732