• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/objects/js-regexp.h"
6 
7 #include "src/objects/js-array-inl.h"
8 #include "src/objects/js-regexp-inl.h"
9 #include "src/regexp/regexp.h"
10 
11 namespace v8 {
12 namespace internal {
13 
GetAndCacheIndices(Isolate * isolate,Handle<JSRegExpResult> regexp_result)14 MaybeHandle<JSArray> JSRegExpResult::GetAndCacheIndices(
15     Isolate* isolate, Handle<JSRegExpResult> regexp_result) {
16   // Check for cached indices. We do a slow lookup and set of
17   // the cached_indices_or_match_info and names fields just in
18   // case they have been migrated to dictionaries.
19   Handle<Object> indices_or_regexp(
20       GetProperty(
21           isolate, regexp_result,
22           isolate->factory()->regexp_result_cached_indices_or_regexp_symbol())
23           .ToHandleChecked());
24   if (indices_or_regexp->IsJSRegExp()) {
25     // Build and cache indices for next lookup.
26     // TODO(joshualitt): Instead of caching the indices, we could call
27     // ReconfigureToDataProperty on 'indices' setting its value to this
28     // newly created array. However, care would have to be taken to ensure
29     // a new map is not created each time.
30 
31     // Grab regexp, its last_index, and the original subject string from the
32     // result and the re-execute the regexp to generate a new MatchInfo.
33     Handle<JSRegExp> regexp(JSRegExp::cast(*indices_or_regexp), isolate);
34     Handle<Object> input_object(
35         GetProperty(isolate, regexp_result,
36                     isolate->factory()->regexp_result_regexp_input_symbol())
37             .ToHandleChecked());
38     Handle<String> subject(String::cast(*input_object), isolate);
39     Handle<Object> last_index_object(
40         GetProperty(
41             isolate, regexp_result,
42             isolate->factory()->regexp_result_regexp_last_index_symbol())
43             .ToHandleChecked());
44 
45     int capture_count = regexp->CaptureCount();
46     Handle<RegExpMatchInfo> match_info =
47         RegExpMatchInfo::New(isolate, capture_count);
48 
49     int last_index = Smi::ToInt(*last_index_object);
50     Handle<Object> result;
51     ASSIGN_RETURN_ON_EXCEPTION(
52         isolate, result,
53         RegExp::Exec(isolate, regexp, subject, last_index, match_info),
54         JSArray);
55     DCHECK_EQ(*result, *match_info);
56 
57     Handle<Object> maybe_names(
58         GetProperty(isolate, regexp_result,
59                     isolate->factory()->regexp_result_names_symbol())
60             .ToHandleChecked());
61     indices_or_regexp =
62         JSRegExpResultIndices::BuildIndices(isolate, match_info, maybe_names);
63 
64     // Cache the result and clear the names array, last_index and subject.
65     SetProperty(
66         isolate, regexp_result,
67         isolate->factory()->regexp_result_cached_indices_or_regexp_symbol(),
68         indices_or_regexp)
69         .ToHandleChecked();
70     SetProperty(isolate, regexp_result,
71                 isolate->factory()->regexp_result_names_symbol(),
72                 isolate->factory()->undefined_value())
73         .ToHandleChecked();
74     SetProperty(isolate, regexp_result,
75                 isolate->factory()->regexp_result_regexp_last_index_symbol(),
76                 isolate->factory()->undefined_value())
77         .ToHandleChecked();
78     SetProperty(isolate, regexp_result,
79                 isolate->factory()->regexp_result_regexp_input_symbol(),
80                 isolate->factory()->undefined_value())
81         .ToHandleChecked();
82   }
83   return Handle<JSArray>::cast(indices_or_regexp);
84 }
85 
BuildIndices(Isolate * isolate,Handle<RegExpMatchInfo> match_info,Handle<Object> maybe_names)86 Handle<JSRegExpResultIndices> JSRegExpResultIndices::BuildIndices(
87     Isolate* isolate, Handle<RegExpMatchInfo> match_info,
88     Handle<Object> maybe_names) {
89   Handle<JSRegExpResultIndices> indices(Handle<JSRegExpResultIndices>::cast(
90       isolate->factory()->NewJSObjectFromMap(
91           isolate->regexp_result_indices_map())));
92 
93   // Initialize indices length to avoid having a partially initialized object
94   // should GC be triggered by creating a NewFixedArray.
95   indices->set_length(Smi::zero());
96 
97   // Build indices array from RegExpMatchInfo.
98   int num_indices = match_info->NumberOfCaptureRegisters();
99   int num_results = num_indices >> 1;
100   Handle<FixedArray> indices_array =
101       isolate->factory()->NewFixedArray(num_results);
102   JSArray::SetContent(indices, indices_array);
103 
104   for (int i = 0; i < num_results; i++) {
105     int base_offset = i * 2;
106     int start_offset = match_info->Capture(base_offset);
107     int end_offset = match_info->Capture(base_offset + 1);
108 
109     // Any unmatched captures are set to undefined, otherwise we set them to a
110     // subarray of the indices.
111     if (start_offset == -1) {
112       indices_array->set(i, ReadOnlyRoots(isolate).undefined_value());
113     } else {
114       Handle<FixedArray> indices_sub_array(
115           isolate->factory()->NewFixedArray(2));
116       indices_sub_array->set(0, Smi::FromInt(start_offset));
117       indices_sub_array->set(1, Smi::FromInt(end_offset));
118       Handle<JSArray> indices_sub_jsarray =
119           isolate->factory()->NewJSArrayWithElements(indices_sub_array,
120                                                      PACKED_SMI_ELEMENTS, 2);
121       indices_array->set(i, *indices_sub_jsarray);
122     }
123   }
124 
125   // If there are no capture groups, set the groups property to undefined.
126   FieldIndex groups_index = FieldIndex::ForDescriptor(
127       indices->map(), InternalIndex(kGroupsDescriptorIndex));
128   if (maybe_names->IsUndefined(isolate)) {
129     indices->RawFastPropertyAtPut(groups_index,
130                                   ReadOnlyRoots(isolate).undefined_value());
131     return indices;
132   }
133 
134   // Create a groups property which returns a dictionary of named captures to
135   // their corresponding capture indices.
136   Handle<FixedArray> names(Handle<FixedArray>::cast(maybe_names));
137   int num_names = names->length() >> 1;
138   Handle<NameDictionary> group_names = NameDictionary::New(isolate, num_names);
139   for (int i = 0; i < num_names; i++) {
140     int base_offset = i * 2;
141     int name_offset = base_offset;
142     int index_offset = base_offset + 1;
143     Handle<String> name(String::cast(names->get(name_offset)), isolate);
144     Handle<Smi> smi_index(Smi::cast(names->get(index_offset)), isolate);
145     Handle<Object> capture_indices(indices_array->get(smi_index->value()),
146                                    isolate);
147     if (!capture_indices->IsUndefined(isolate)) {
148       capture_indices = Handle<JSArray>::cast(capture_indices);
149     }
150     group_names = NameDictionary::Add(
151         isolate, group_names, name, capture_indices, PropertyDetails::Empty());
152   }
153 
154   // Convert group_names to a JSObject and store at the groups property of the
155   // result indices.
156   Handle<FixedArrayBase> elements = isolate->factory()->empty_fixed_array();
157   Handle<HeapObject> null =
158       Handle<HeapObject>::cast(isolate->factory()->null_value());
159   Handle<JSObject> js_group_names =
160       isolate->factory()->NewSlowJSObjectWithPropertiesAndElements(
161           null, group_names, elements);
162   indices->RawFastPropertyAtPut(groups_index, *js_group_names);
163   return indices;
164 }
165 
BacktrackLimit() const166 uint32_t JSRegExp::BacktrackLimit() const {
167   CHECK_EQ(TypeTag(), IRREGEXP);
168   return static_cast<uint32_t>(Smi::ToInt(DataAt(kIrregexpBacktrackLimit)));
169 }
170 
171 // static
FlagsFromString(Isolate * isolate,Handle<String> flags,bool * success)172 JSRegExp::Flags JSRegExp::FlagsFromString(Isolate* isolate,
173                                           Handle<String> flags, bool* success) {
174   int length = flags->length();
175   if (length == 0) {
176     *success = true;
177     return JSRegExp::kNone;
178   }
179   // A longer flags string cannot be valid.
180   if (length > JSRegExp::kFlagCount) return JSRegExp::Flags(0);
181   JSRegExp::Flags value(0);
182   if (flags->IsSeqOneByteString()) {
183     DisallowHeapAllocation no_gc;
184     SeqOneByteString seq_flags = SeqOneByteString::cast(*flags);
185     for (int i = 0; i < length; i++) {
186       base::Optional<JSRegExp::Flag> maybe_flag =
187           JSRegExp::FlagFromChar(seq_flags.Get(i));
188       if (!maybe_flag.has_value()) return JSRegExp::Flags(0);
189       JSRegExp::Flag flag = *maybe_flag;
190       // Duplicate flag.
191       if (value & flag) return JSRegExp::Flags(0);
192       value |= flag;
193     }
194   } else {
195     flags = String::Flatten(isolate, flags);
196     DisallowHeapAllocation no_gc;
197     String::FlatContent flags_content = flags->GetFlatContent(no_gc);
198     for (int i = 0; i < length; i++) {
199       base::Optional<JSRegExp::Flag> maybe_flag =
200           JSRegExp::FlagFromChar(flags_content.Get(i));
201       if (!maybe_flag.has_value()) return JSRegExp::Flags(0);
202       JSRegExp::Flag flag = *maybe_flag;
203       // Duplicate flag.
204       if (value & flag) return JSRegExp::Flags(0);
205       value |= flag;
206     }
207   }
208   *success = true;
209   return value;
210 }
211 
212 // static
New(Isolate * isolate,Handle<String> pattern,Flags flags,uint32_t backtrack_limit)213 MaybeHandle<JSRegExp> JSRegExp::New(Isolate* isolate, Handle<String> pattern,
214                                     Flags flags, uint32_t backtrack_limit) {
215   Handle<JSFunction> constructor = isolate->regexp_function();
216   Handle<JSRegExp> regexp =
217       Handle<JSRegExp>::cast(isolate->factory()->NewJSObject(constructor));
218 
219   return JSRegExp::Initialize(regexp, pattern, flags, backtrack_limit);
220 }
221 
222 // static
Copy(Handle<JSRegExp> regexp)223 Handle<JSRegExp> JSRegExp::Copy(Handle<JSRegExp> regexp) {
224   Isolate* const isolate = regexp->GetIsolate();
225   return Handle<JSRegExp>::cast(isolate->factory()->CopyJSObject(regexp));
226 }
227 
Code(bool is_latin1) const228 Object JSRegExp::Code(bool is_latin1) const {
229   DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP);
230   return DataAt(code_index(is_latin1));
231 }
232 
Bytecode(bool is_latin1) const233 Object JSRegExp::Bytecode(bool is_latin1) const {
234   DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP);
235   return DataAt(bytecode_index(is_latin1));
236 }
237 
ShouldProduceBytecode()238 bool JSRegExp::ShouldProduceBytecode() {
239   return FLAG_regexp_interpret_all ||
240          (FLAG_regexp_tier_up && !MarkedForTierUp());
241 }
242 
243 // Only irregexps are subject to tier-up.
CanTierUp()244 bool JSRegExp::CanTierUp() {
245   return FLAG_regexp_tier_up && TypeTag() == JSRegExp::IRREGEXP;
246 }
247 
248 // An irregexp is considered to be marked for tier up if the tier-up ticks
249 // value reaches zero.
MarkedForTierUp()250 bool JSRegExp::MarkedForTierUp() {
251   DCHECK(data().IsFixedArray());
252 
253   if (!CanTierUp()) {
254     return false;
255   }
256 
257   return Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)) == 0;
258 }
259 
ResetLastTierUpTick()260 void JSRegExp::ResetLastTierUpTick() {
261   DCHECK(FLAG_regexp_tier_up);
262   DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP);
263   int tier_up_ticks = Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)) + 1;
264   FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
265                                Smi::FromInt(tier_up_ticks));
266 }
267 
TierUpTick()268 void JSRegExp::TierUpTick() {
269   DCHECK(FLAG_regexp_tier_up);
270   DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP);
271   int tier_up_ticks = Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex));
272   if (tier_up_ticks == 0) {
273     return;
274   }
275   FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
276                                Smi::FromInt(tier_up_ticks - 1));
277 }
278 
MarkTierUpForNextExec()279 void JSRegExp::MarkTierUpForNextExec() {
280   DCHECK(FLAG_regexp_tier_up);
281   DCHECK_EQ(TypeTag(), JSRegExp::IRREGEXP);
282   FixedArray::cast(data()).set(JSRegExp::kIrregexpTicksUntilTierUpIndex,
283                                Smi::zero());
284 }
285 
286 // static
Initialize(Handle<JSRegExp> regexp,Handle<String> source,Handle<String> flags_string)287 MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
288                                            Handle<String> source,
289                                            Handle<String> flags_string) {
290   Isolate* isolate = regexp->GetIsolate();
291   bool success = false;
292   Flags flags = JSRegExp::FlagsFromString(isolate, flags_string, &success);
293   if (!success) {
294     THROW_NEW_ERROR(
295         isolate,
296         NewSyntaxError(MessageTemplate::kInvalidRegExpFlags, flags_string),
297         JSRegExp);
298   }
299   return Initialize(regexp, source, flags);
300 }
301 
302 namespace {
303 
IsLineTerminator(int c)304 bool IsLineTerminator(int c) {
305   // Expected to return true for '\n', '\r', 0x2028, and 0x2029.
306   return unibrow::IsLineTerminator(static_cast<unibrow::uchar>(c));
307 }
308 
309 // TODO(jgruber): Consider merging CountAdditionalEscapeChars and
310 // WriteEscapedRegExpSource into a single function to deduplicate dispatch logic
311 // and move related code closer to each other.
312 template <typename Char>
CountAdditionalEscapeChars(Handle<String> source,bool * needs_escapes_out)313 int CountAdditionalEscapeChars(Handle<String> source, bool* needs_escapes_out) {
314   DisallowHeapAllocation no_gc;
315   int escapes = 0;
316   bool needs_escapes = false;
317   bool in_char_class = false;
318   Vector<const Char> src = source->GetCharVector<Char>(no_gc);
319   for (int i = 0; i < src.length(); i++) {
320     const Char c = src[i];
321     if (c == '\\') {
322       if (i + 1 < src.length() && IsLineTerminator(src[i + 1])) {
323         // This '\' is ignored since the next character itself will be escaped.
324         escapes--;
325       } else {
326         // Escape. Skip next character, which will be copied verbatim;
327         i++;
328       }
329     } else if (c == '/' && !in_char_class) {
330       // Not escaped forward-slash needs escape.
331       needs_escapes = true;
332       escapes++;
333     } else if (c == '[') {
334       in_char_class = true;
335     } else if (c == ']') {
336       in_char_class = false;
337     } else if (c == '\n') {
338       needs_escapes = true;
339       escapes++;
340     } else if (c == '\r') {
341       needs_escapes = true;
342       escapes++;
343     } else if (static_cast<int>(c) == 0x2028) {
344       needs_escapes = true;
345       escapes += std::strlen("\\u2028") - 1;
346     } else if (static_cast<int>(c) == 0x2029) {
347       needs_escapes = true;
348       escapes += std::strlen("\\u2029") - 1;
349     } else {
350       DCHECK(!IsLineTerminator(c));
351     }
352   }
353   DCHECK(!in_char_class);
354   DCHECK_GE(escapes, 0);
355   DCHECK_IMPLIES(escapes != 0, needs_escapes);
356   *needs_escapes_out = needs_escapes;
357   return escapes;
358 }
359 
360 template <typename Char>
WriteStringToCharVector(Vector<Char> v,int * d,const char * string)361 void WriteStringToCharVector(Vector<Char> v, int* d, const char* string) {
362   int s = 0;
363   while (string[s] != '\0') v[(*d)++] = string[s++];
364 }
365 
366 template <typename Char, typename StringType>
WriteEscapedRegExpSource(Handle<String> source,Handle<StringType> result)367 Handle<StringType> WriteEscapedRegExpSource(Handle<String> source,
368                                             Handle<StringType> result) {
369   DisallowHeapAllocation no_gc;
370   Vector<const Char> src = source->GetCharVector<Char>(no_gc);
371   Vector<Char> dst(result->GetChars(no_gc), result->length());
372   int s = 0;
373   int d = 0;
374   bool in_char_class = false;
375   while (s < src.length()) {
376     const Char c = src[s];
377     if (c == '\\') {
378       if (s + 1 < src.length() && IsLineTerminator(src[s + 1])) {
379         // This '\' is ignored since the next character itself will be escaped.
380         s++;
381         continue;
382       } else {
383         // Escape. Copy this and next character.
384         dst[d++] = src[s++];
385       }
386       if (s == src.length()) break;
387     } else if (c == '/' && !in_char_class) {
388       // Not escaped forward-slash needs escape.
389       dst[d++] = '\\';
390     } else if (c == '[') {
391       in_char_class = true;
392     } else if (c == ']') {
393       in_char_class = false;
394     } else if (c == '\n') {
395       WriteStringToCharVector(dst, &d, "\\n");
396       s++;
397       continue;
398     } else if (c == '\r') {
399       WriteStringToCharVector(dst, &d, "\\r");
400       s++;
401       continue;
402     } else if (static_cast<int>(c) == 0x2028) {
403       WriteStringToCharVector(dst, &d, "\\u2028");
404       s++;
405       continue;
406     } else if (static_cast<int>(c) == 0x2029) {
407       WriteStringToCharVector(dst, &d, "\\u2029");
408       s++;
409       continue;
410     } else {
411       DCHECK(!IsLineTerminator(c));
412     }
413     dst[d++] = src[s++];
414   }
415   DCHECK_EQ(result->length(), d);
416   DCHECK(!in_char_class);
417   return result;
418 }
419 
EscapeRegExpSource(Isolate * isolate,Handle<String> source)420 MaybeHandle<String> EscapeRegExpSource(Isolate* isolate,
421                                        Handle<String> source) {
422   DCHECK(source->IsFlat());
423   if (source->length() == 0) return isolate->factory()->query_colon_string();
424   bool one_byte = String::IsOneByteRepresentationUnderneath(*source);
425   bool needs_escapes = false;
426   int additional_escape_chars =
427       one_byte ? CountAdditionalEscapeChars<uint8_t>(source, &needs_escapes)
428                : CountAdditionalEscapeChars<uc16>(source, &needs_escapes);
429   if (!needs_escapes) return source;
430   int length = source->length() + additional_escape_chars;
431   if (one_byte) {
432     Handle<SeqOneByteString> result;
433     ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
434                                isolate->factory()->NewRawOneByteString(length),
435                                String);
436     return WriteEscapedRegExpSource<uint8_t>(source, result);
437   } else {
438     Handle<SeqTwoByteString> result;
439     ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
440                                isolate->factory()->NewRawTwoByteString(length),
441                                String);
442     return WriteEscapedRegExpSource<uc16>(source, result);
443   }
444 }
445 
446 }  // namespace
447 
448 // static
Initialize(Handle<JSRegExp> regexp,Handle<String> source,Flags flags,uint32_t backtrack_limit)449 MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
450                                            Handle<String> source, Flags flags,
451                                            uint32_t backtrack_limit) {
452   Isolate* isolate = regexp->GetIsolate();
453   Factory* factory = isolate->factory();
454   // If source is the empty string we set it to "(?:)" instead as
455   // suggested by ECMA-262, 5th, section 15.10.4.1.
456   if (source->length() == 0) source = factory->query_colon_string();
457 
458   source = String::Flatten(isolate, source);
459 
460   RETURN_ON_EXCEPTION(
461       isolate, RegExp::Compile(isolate, regexp, source, flags, backtrack_limit),
462       JSRegExp);
463 
464   Handle<String> escaped_source;
465   ASSIGN_RETURN_ON_EXCEPTION(isolate, escaped_source,
466                              EscapeRegExpSource(isolate, source), JSRegExp);
467 
468   regexp->set_source(*escaped_source);
469   regexp->set_flags(Smi::FromInt(flags));
470 
471   Map map = regexp->map();
472   Object constructor = map.GetConstructor();
473   if (constructor.IsJSFunction() &&
474       JSFunction::cast(constructor).initial_map() == map) {
475     // If we still have the original map, set in-object properties directly.
476     regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex, Smi::zero(),
477                                   SKIP_WRITE_BARRIER);
478   } else {
479     // Map has changed, so use generic, but slower, method.
480     RETURN_ON_EXCEPTION(
481         isolate,
482         Object::SetProperty(isolate, regexp, factory->lastIndex_string(),
483                             Handle<Smi>(Smi::zero(), isolate)),
484         JSRegExp);
485   }
486 
487   return regexp;
488 }
489 
490 }  // namespace internal
491 }  // namespace v8
492