1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/regexp/regexp-utils.h"
6
7 #include "src/execution/isolate.h"
8 #include "src/execution/protectors-inl.h"
9 #include "src/heap/factory.h"
10 #include "src/objects/js-regexp-inl.h"
11 #include "src/objects/objects-inl.h"
12 #include "src/regexp/regexp.h"
13
14 namespace v8 {
15 namespace internal {
16
GenericCaptureGetter(Isolate * isolate,Handle<RegExpMatchInfo> match_info,int capture,bool * ok)17 Handle<String> RegExpUtils::GenericCaptureGetter(
18 Isolate* isolate, Handle<RegExpMatchInfo> match_info, int capture,
19 bool* ok) {
20 const int index = capture * 2;
21 if (index >= match_info->NumberOfCaptureRegisters()) {
22 if (ok != nullptr) *ok = false;
23 return isolate->factory()->empty_string();
24 }
25
26 const int match_start = match_info->Capture(index);
27 const int match_end = match_info->Capture(index + 1);
28 if (match_start == -1 || match_end == -1) {
29 if (ok != nullptr) *ok = false;
30 return isolate->factory()->empty_string();
31 }
32
33 if (ok != nullptr) *ok = true;
34 Handle<String> last_subject(match_info->LastSubject(), isolate);
35 return isolate->factory()->NewSubString(last_subject, match_start, match_end);
36 }
37
38 namespace {
39
HasInitialRegExpMap(Isolate * isolate,JSReceiver recv)40 V8_INLINE bool HasInitialRegExpMap(Isolate* isolate, JSReceiver recv) {
41 return recv.map() == isolate->regexp_function()->initial_map();
42 }
43
44 } // namespace
45
SetLastIndex(Isolate * isolate,Handle<JSReceiver> recv,uint64_t value)46 MaybeHandle<Object> RegExpUtils::SetLastIndex(Isolate* isolate,
47 Handle<JSReceiver> recv,
48 uint64_t value) {
49 Handle<Object> value_as_object =
50 isolate->factory()->NewNumberFromInt64(value);
51 if (HasInitialRegExpMap(isolate, *recv)) {
52 JSRegExp::cast(*recv).set_last_index(*value_as_object,
53 UPDATE_WRITE_BARRIER);
54 return recv;
55 } else {
56 return Object::SetProperty(
57 isolate, recv, isolate->factory()->lastIndex_string(), value_as_object,
58 StoreOrigin::kMaybeKeyed, Just(kThrowOnError));
59 }
60 }
61
GetLastIndex(Isolate * isolate,Handle<JSReceiver> recv)62 MaybeHandle<Object> RegExpUtils::GetLastIndex(Isolate* isolate,
63 Handle<JSReceiver> recv) {
64 if (HasInitialRegExpMap(isolate, *recv)) {
65 return handle(JSRegExp::cast(*recv).last_index(), isolate);
66 } else {
67 return Object::GetProperty(isolate, recv,
68 isolate->factory()->lastIndex_string());
69 }
70 }
71
72 // ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
73 // Also takes an optional exec method in case our caller
74 // has already fetched exec.
RegExpExec(Isolate * isolate,Handle<JSReceiver> regexp,Handle<String> string,Handle<Object> exec)75 MaybeHandle<Object> RegExpUtils::RegExpExec(Isolate* isolate,
76 Handle<JSReceiver> regexp,
77 Handle<String> string,
78 Handle<Object> exec) {
79 if (exec->IsUndefined(isolate)) {
80 ASSIGN_RETURN_ON_EXCEPTION(
81 isolate, exec,
82 Object::GetProperty(isolate, regexp, isolate->factory()->exec_string()),
83 Object);
84 }
85
86 if (exec->IsCallable()) {
87 const int argc = 1;
88 base::ScopedVector<Handle<Object>> argv(argc);
89 argv[0] = string;
90
91 Handle<Object> result;
92 ASSIGN_RETURN_ON_EXCEPTION(
93 isolate, result,
94 Execution::Call(isolate, exec, regexp, argc, argv.begin()), Object);
95
96 if (!result->IsJSReceiver() && !result->IsNull(isolate)) {
97 THROW_NEW_ERROR(isolate,
98 NewTypeError(MessageTemplate::kInvalidRegExpExecResult),
99 Object);
100 }
101 return result;
102 }
103
104 if (!regexp->IsJSRegExp()) {
105 THROW_NEW_ERROR(isolate,
106 NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
107 isolate->factory()->NewStringFromAsciiChecked(
108 "RegExp.prototype.exec"),
109 regexp),
110 Object);
111 }
112
113 {
114 Handle<JSFunction> regexp_exec = isolate->regexp_exec_function();
115
116 const int argc = 1;
117 base::ScopedVector<Handle<Object>> argv(argc);
118 argv[0] = string;
119
120 return Execution::Call(isolate, regexp_exec, regexp, argc, argv.begin());
121 }
122 }
123
IsUnmodifiedRegExp(Isolate * isolate,Handle<Object> obj)124 bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) {
125 #ifdef V8_ENABLE_FORCE_SLOW_PATH
126 if (isolate->force_slow_path()) return false;
127 #endif
128
129 if (!obj->IsJSReceiver()) return false;
130
131 JSReceiver recv = JSReceiver::cast(*obj);
132
133 if (!HasInitialRegExpMap(isolate, recv)) return false;
134
135 // Check the receiver's prototype's map.
136 Object proto = recv.map().prototype();
137 if (!proto.IsJSReceiver()) return false;
138
139 Handle<Map> initial_proto_initial_map = isolate->regexp_prototype_map();
140 Map proto_map = JSReceiver::cast(proto).map();
141 if (proto_map != *initial_proto_initial_map) {
142 return false;
143 }
144
145 // Check that the "exec" method is unmodified.
146 // Check that the index refers to "exec" method (this has to be consistent
147 // with the init order in the bootstrapper).
148 InternalIndex kExecIndex(JSRegExp::kExecFunctionDescriptorIndex);
149 DCHECK_EQ(*(isolate->factory()->exec_string()),
150 proto_map.instance_descriptors(isolate).GetKey(kExecIndex));
151 if (proto_map.instance_descriptors(isolate)
152 .GetDetails(kExecIndex)
153 .constness() != PropertyConstness::kConst) {
154 return false;
155 }
156
157 // Note: Unlike the more involved check in CSA (see BranchIfFastRegExp), this
158 // does not go on to check the actual value of the exec property. This would
159 // not be valid since this method is called from places that access the flags
160 // property. Similar spots in CSA would use BranchIfFastRegExp_Strict in this
161 // case.
162
163 if (!Protectors::IsRegExpSpeciesLookupChainIntact(isolate)) return false;
164
165 // The smi check is required to omit ToLength(lastIndex) calls with possible
166 // user-code execution on the fast path.
167 Object last_index = JSRegExp::cast(recv).last_index();
168 return last_index.IsSmi() && Smi::ToInt(last_index) >= 0;
169 }
170
AdvanceStringIndex(Handle<String> string,uint64_t index,bool unicode)171 uint64_t RegExpUtils::AdvanceStringIndex(Handle<String> string, uint64_t index,
172 bool unicode) {
173 DCHECK_LE(static_cast<double>(index), kMaxSafeInteger);
174 const uint64_t string_length = static_cast<uint64_t>(string->length());
175 if (unicode && index < string_length) {
176 const uint16_t first = string->Get(static_cast<uint32_t>(index));
177 if (first >= 0xD800 && first <= 0xDBFF && index + 1 < string_length) {
178 DCHECK_LT(index, std::numeric_limits<uint64_t>::max());
179 const uint16_t second = string->Get(static_cast<uint32_t>(index + 1));
180 if (second >= 0xDC00 && second <= 0xDFFF) {
181 return index + 2;
182 }
183 }
184 }
185
186 return index + 1;
187 }
188
SetAdvancedStringIndex(Isolate * isolate,Handle<JSReceiver> regexp,Handle<String> string,bool unicode)189 MaybeHandle<Object> RegExpUtils::SetAdvancedStringIndex(
190 Isolate* isolate, Handle<JSReceiver> regexp, Handle<String> string,
191 bool unicode) {
192 Handle<Object> last_index_obj;
193 ASSIGN_RETURN_ON_EXCEPTION(
194 isolate, last_index_obj,
195 Object::GetProperty(isolate, regexp,
196 isolate->factory()->lastIndex_string()),
197 Object);
198
199 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
200 Object::ToLength(isolate, last_index_obj), Object);
201 const uint64_t last_index = PositiveNumberToUint64(*last_index_obj);
202 const uint64_t new_last_index =
203 AdvanceStringIndex(string, last_index, unicode);
204
205 return SetLastIndex(isolate, regexp, new_last_index);
206 }
207
208 } // namespace internal
209 } // namespace v8
210