1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/builtins/builtins-string-gen.h"
6
7 #include "src/base/strings.h"
8 #include "src/builtins/builtins-regexp-gen.h"
9 #include "src/builtins/builtins-utils-gen.h"
10 #include "src/builtins/builtins.h"
11 #include "src/codegen/code-factory.h"
12 #include "src/execution/protectors.h"
13 #include "src/heap/factory-inl.h"
14 #include "src/heap/heap-inl.h"
15 #include "src/logging/counters.h"
16 #include "src/objects/objects.h"
17 #include "src/objects/property-cell.h"
18
19 namespace v8 {
20 namespace internal {
21
DirectStringData(TNode<String> string,TNode<Word32T> string_instance_type)22 TNode<RawPtrT> StringBuiltinsAssembler::DirectStringData(
23 TNode<String> string, TNode<Word32T> string_instance_type) {
24 // Compute the effective offset of the first character.
25 TVARIABLE(RawPtrT, var_data);
26 Label if_sequential(this), if_external(this), if_join(this);
27 Branch(Word32Equal(Word32And(string_instance_type,
28 Int32Constant(kStringRepresentationMask)),
29 Int32Constant(kSeqStringTag)),
30 &if_sequential, &if_external);
31
32 BIND(&if_sequential);
33 {
34 var_data = RawPtrAdd(
35 ReinterpretCast<RawPtrT>(BitcastTaggedToWord(string)),
36 IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag));
37 Goto(&if_join);
38 }
39
40 BIND(&if_external);
41 {
42 var_data = LoadExternalStringResourceDataPtr(CAST(string));
43 Goto(&if_join);
44 }
45
46 BIND(&if_join);
47 return var_data.value();
48 }
49
50 template <typename SubjectChar, typename PatternChar>
CallSearchStringRaw(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> search_length,const TNode<IntPtrT> start_position)51 TNode<IntPtrT> StringBuiltinsAssembler::CallSearchStringRaw(
52 const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
53 const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> search_length,
54 const TNode<IntPtrT> start_position) {
55 const TNode<ExternalReference> function_addr = ExternalConstant(
56 ExternalReference::search_string_raw<SubjectChar, PatternChar>());
57 const TNode<ExternalReference> isolate_ptr =
58 ExternalConstant(ExternalReference::isolate_address(isolate()));
59
60 MachineType type_ptr = MachineType::Pointer();
61 MachineType type_intptr = MachineType::IntPtr();
62
63 const TNode<IntPtrT> result = UncheckedCast<IntPtrT>(CallCFunction(
64 function_addr, type_intptr, std::make_pair(type_ptr, isolate_ptr),
65 std::make_pair(type_ptr, subject_ptr),
66 std::make_pair(type_intptr, subject_length),
67 std::make_pair(type_ptr, search_ptr),
68 std::make_pair(type_intptr, search_length),
69 std::make_pair(type_intptr, start_position)));
70
71 return result;
72 }
SearchOneByteStringInTwoByteString(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> search_length,const TNode<IntPtrT> start_position)73 TNode<IntPtrT> StringBuiltinsAssembler::SearchOneByteStringInTwoByteString(
74 const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
75 const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> search_length,
76 const TNode<IntPtrT> start_position) {
77 return CallSearchStringRaw<const base::uc16, const uint8_t>(
78 subject_ptr, subject_length, search_ptr, search_length, start_position);
79 }
SearchOneByteStringInOneByteString(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> search_length,const TNode<IntPtrT> start_position)80 TNode<IntPtrT> StringBuiltinsAssembler::SearchOneByteStringInOneByteString(
81 const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
82 const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> search_length,
83 const TNode<IntPtrT> start_position) {
84 return CallSearchStringRaw<const uint8_t, const uint8_t>(
85 subject_ptr, subject_length, search_ptr, search_length, start_position);
86 }
SearchTwoByteStringInTwoByteString(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> search_length,const TNode<IntPtrT> start_position)87 TNode<IntPtrT> StringBuiltinsAssembler::SearchTwoByteStringInTwoByteString(
88 const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
89 const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> search_length,
90 const TNode<IntPtrT> start_position) {
91 return CallSearchStringRaw<const base::uc16, const base::uc16>(
92 subject_ptr, subject_length, search_ptr, search_length, start_position);
93 }
SearchTwoByteStringInOneByteString(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> search_length,const TNode<IntPtrT> start_position)94 TNode<IntPtrT> StringBuiltinsAssembler::SearchTwoByteStringInOneByteString(
95 const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
96 const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> search_length,
97 const TNode<IntPtrT> start_position) {
98 return CallSearchStringRaw<const uint8_t, const base::uc16>(
99 subject_ptr, subject_length, search_ptr, search_length, start_position);
100 }
SearchOneByteInOneByteString(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> start_position)101 TNode<IntPtrT> StringBuiltinsAssembler::SearchOneByteInOneByteString(
102 const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
103 const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> start_position) {
104 const TNode<RawPtrT> subject_start_ptr =
105 RawPtrAdd(subject_ptr, start_position);
106 const TNode<IntPtrT> search_byte =
107 ChangeInt32ToIntPtr(Load<Uint8T>(search_ptr));
108 const TNode<UintPtrT> search_length =
109 Unsigned(IntPtrSub(subject_length, start_position));
110 const TNode<ExternalReference> memchr =
111 ExternalConstant(ExternalReference::libc_memchr_function());
112 const TNode<RawPtrT> result_address = UncheckedCast<RawPtrT>(
113 CallCFunction(memchr, MachineType::Pointer(),
114 std::make_pair(MachineType::Pointer(), subject_start_ptr),
115 std::make_pair(MachineType::IntPtr(), search_byte),
116 std::make_pair(MachineType::UintPtr(), search_length)));
117 return Select<IntPtrT>(
118 WordEqual(result_address, IntPtrConstant(0)),
119 [=] { return IntPtrConstant(-1); },
120 [=] {
121 return IntPtrAdd(RawPtrSub(result_address, subject_start_ptr),
122 start_position);
123 });
124 }
125
GenerateStringEqual(TNode<String> left,TNode<String> right)126 void StringBuiltinsAssembler::GenerateStringEqual(TNode<String> left,
127 TNode<String> right) {
128 TVARIABLE(String, var_left, left);
129 TVARIABLE(String, var_right, right);
130 Label if_equal(this), if_notequal(this), if_indirect(this, Label::kDeferred),
131 restart(this, {&var_left, &var_right});
132
133 TNode<IntPtrT> lhs_length = LoadStringLengthAsWord(left);
134 TNode<IntPtrT> rhs_length = LoadStringLengthAsWord(right);
135
136 // Strings with different lengths cannot be equal.
137 GotoIf(WordNotEqual(lhs_length, rhs_length), &if_notequal);
138
139 Goto(&restart);
140 BIND(&restart);
141 TNode<String> lhs = var_left.value();
142 TNode<String> rhs = var_right.value();
143
144 TNode<Uint16T> lhs_instance_type = LoadInstanceType(lhs);
145 TNode<Uint16T> rhs_instance_type = LoadInstanceType(rhs);
146
147 StringEqual_Core(lhs, lhs_instance_type, rhs, rhs_instance_type, lhs_length,
148 &if_equal, &if_notequal, &if_indirect);
149
150 BIND(&if_indirect);
151 {
152 // Try to unwrap indirect strings, restart the above attempt on success.
153 MaybeDerefIndirectStrings(&var_left, lhs_instance_type, &var_right,
154 rhs_instance_type, &restart);
155
156 TailCallRuntime(Runtime::kStringEqual, NoContextConstant(), lhs, rhs);
157 }
158
159 BIND(&if_equal);
160 Return(TrueConstant());
161
162 BIND(&if_notequal);
163 Return(FalseConstant());
164 }
165
StringEqual_Core(TNode<String> lhs,TNode<Word32T> lhs_instance_type,TNode<String> rhs,TNode<Word32T> rhs_instance_type,TNode<IntPtrT> length,Label * if_equal,Label * if_not_equal,Label * if_indirect)166 void StringBuiltinsAssembler::StringEqual_Core(
167 TNode<String> lhs, TNode<Word32T> lhs_instance_type, TNode<String> rhs,
168 TNode<Word32T> rhs_instance_type, TNode<IntPtrT> length, Label* if_equal,
169 Label* if_not_equal, Label* if_indirect) {
170 CSA_DCHECK(this, WordEqual(LoadStringLengthAsWord(lhs), length));
171 CSA_DCHECK(this, WordEqual(LoadStringLengthAsWord(rhs), length));
172 // Fast check to see if {lhs} and {rhs} refer to the same String object.
173 GotoIf(TaggedEqual(lhs, rhs), if_equal);
174
175 // Combine the instance types into a single 16-bit value, so we can check
176 // both of them at once.
177 TNode<Word32T> both_instance_types = Word32Or(
178 lhs_instance_type, Word32Shl(rhs_instance_type, Int32Constant(8)));
179
180 // Check if both {lhs} and {rhs} are internalized. Since we already know
181 // that they're not the same object, they're not equal in that case.
182 int const kBothInternalizedMask =
183 kIsNotInternalizedMask | (kIsNotInternalizedMask << 8);
184 int const kBothInternalizedTag = kInternalizedTag | (kInternalizedTag << 8);
185 GotoIf(Word32Equal(Word32And(both_instance_types,
186 Int32Constant(kBothInternalizedMask)),
187 Int32Constant(kBothInternalizedTag)),
188 if_not_equal);
189
190 // Check if both {lhs} and {rhs} are direct strings, and that in case of
191 // ExternalStrings the data pointer is cached.
192 STATIC_ASSERT(kUncachedExternalStringTag != 0);
193 STATIC_ASSERT(kIsIndirectStringTag != 0);
194 int const kBothDirectStringMask =
195 kIsIndirectStringMask | kUncachedExternalStringMask |
196 ((kIsIndirectStringMask | kUncachedExternalStringMask) << 8);
197 GotoIfNot(Word32Equal(Word32And(both_instance_types,
198 Int32Constant(kBothDirectStringMask)),
199 Int32Constant(0)),
200 if_indirect);
201
202 // Dispatch based on the {lhs} and {rhs} string encoding.
203 int const kBothStringEncodingMask =
204 kStringEncodingMask | (kStringEncodingMask << 8);
205 int const kOneOneByteStringTag = kOneByteStringTag | (kOneByteStringTag << 8);
206 int const kTwoTwoByteStringTag = kTwoByteStringTag | (kTwoByteStringTag << 8);
207 int const kOneTwoByteStringTag = kOneByteStringTag | (kTwoByteStringTag << 8);
208 Label if_oneonebytestring(this), if_twotwobytestring(this),
209 if_onetwobytestring(this), if_twoonebytestring(this);
210 TNode<Word32T> masked_instance_types =
211 Word32And(both_instance_types, Int32Constant(kBothStringEncodingMask));
212 GotoIf(
213 Word32Equal(masked_instance_types, Int32Constant(kOneOneByteStringTag)),
214 &if_oneonebytestring);
215 GotoIf(
216 Word32Equal(masked_instance_types, Int32Constant(kTwoTwoByteStringTag)),
217 &if_twotwobytestring);
218 Branch(
219 Word32Equal(masked_instance_types, Int32Constant(kOneTwoByteStringTag)),
220 &if_onetwobytestring, &if_twoonebytestring);
221
222 BIND(&if_oneonebytestring);
223 StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint8(), rhs,
224 rhs_instance_type, MachineType::Uint8(), length, if_equal,
225 if_not_equal);
226
227 BIND(&if_twotwobytestring);
228 StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint16(), rhs,
229 rhs_instance_type, MachineType::Uint16(), length, if_equal,
230 if_not_equal);
231
232 BIND(&if_onetwobytestring);
233 StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint8(), rhs,
234 rhs_instance_type, MachineType::Uint16(), length, if_equal,
235 if_not_equal);
236
237 BIND(&if_twoonebytestring);
238 StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint16(), rhs,
239 rhs_instance_type, MachineType::Uint8(), length, if_equal,
240 if_not_equal);
241 }
242
StringEqual_Loop(TNode<String> lhs,TNode<Word32T> lhs_instance_type,MachineType lhs_type,TNode<String> rhs,TNode<Word32T> rhs_instance_type,MachineType rhs_type,TNode<IntPtrT> length,Label * if_equal,Label * if_not_equal)243 void StringBuiltinsAssembler::StringEqual_Loop(
244 TNode<String> lhs, TNode<Word32T> lhs_instance_type, MachineType lhs_type,
245 TNode<String> rhs, TNode<Word32T> rhs_instance_type, MachineType rhs_type,
246 TNode<IntPtrT> length, Label* if_equal, Label* if_not_equal) {
247 CSA_DCHECK(this, WordEqual(LoadStringLengthAsWord(lhs), length));
248 CSA_DCHECK(this, WordEqual(LoadStringLengthAsWord(rhs), length));
249
250 // Compute the effective offset of the first character.
251 TNode<RawPtrT> lhs_data = DirectStringData(lhs, lhs_instance_type);
252 TNode<RawPtrT> rhs_data = DirectStringData(rhs, rhs_instance_type);
253
254 // Loop over the {lhs} and {rhs} strings to see if they are equal.
255 TVARIABLE(IntPtrT, var_offset, IntPtrConstant(0));
256 Label loop(this, &var_offset);
257 Goto(&loop);
258 BIND(&loop);
259 {
260 // If {offset} equals {end}, no difference was found, so the
261 // strings are equal.
262 GotoIf(WordEqual(var_offset.value(), length), if_equal);
263
264 // Load the next characters from {lhs} and {rhs}.
265 TNode<Word32T> lhs_value = UncheckedCast<Word32T>(
266 Load(lhs_type, lhs_data,
267 WordShl(var_offset.value(),
268 ElementSizeLog2Of(lhs_type.representation()))));
269 TNode<Word32T> rhs_value = UncheckedCast<Word32T>(
270 Load(rhs_type, rhs_data,
271 WordShl(var_offset.value(),
272 ElementSizeLog2Of(rhs_type.representation()))));
273
274 // Check if the characters match.
275 GotoIf(Word32NotEqual(lhs_value, rhs_value), if_not_equal);
276
277 // Advance to next character.
278 var_offset = IntPtrAdd(var_offset.value(), IntPtrConstant(1));
279 Goto(&loop);
280 }
281 }
282
StringFromSingleUTF16EncodedCodePoint(TNode<Int32T> codepoint)283 TNode<String> StringBuiltinsAssembler::StringFromSingleUTF16EncodedCodePoint(
284 TNode<Int32T> codepoint) {
285 TVARIABLE(String, var_result, EmptyStringConstant());
286
287 Label if_isword16(this), if_isword32(this), return_result(this);
288
289 Branch(Uint32LessThan(codepoint, Int32Constant(0x10000)), &if_isword16,
290 &if_isword32);
291
292 BIND(&if_isword16);
293 {
294 var_result = StringFromSingleCharCode(codepoint);
295 Goto(&return_result);
296 }
297
298 BIND(&if_isword32);
299 {
300 TNode<String> value = AllocateSeqTwoByteString(2);
301 StoreNoWriteBarrier(
302 MachineRepresentation::kWord32, value,
303 IntPtrConstant(SeqTwoByteString::kHeaderSize - kHeapObjectTag),
304 codepoint);
305 var_result = value;
306 Goto(&return_result);
307 }
308
309 BIND(&return_result);
310 return var_result.value();
311 }
312
AllocateConsString(TNode<Uint32T> length,TNode<String> left,TNode<String> right)313 TNode<String> StringBuiltinsAssembler::AllocateConsString(TNode<Uint32T> length,
314 TNode<String> left,
315 TNode<String> right) {
316 // Added string can be a cons string.
317 Comment("Allocating ConsString");
318 TNode<Int32T> left_instance_type = LoadInstanceType(left);
319 TNode<Int32T> right_instance_type = LoadInstanceType(right);
320
321 // Determine the resulting ConsString map to use depending on whether
322 // any of {left} or {right} has two byte encoding.
323 STATIC_ASSERT(kOneByteStringTag != 0);
324 STATIC_ASSERT(kTwoByteStringTag == 0);
325 TNode<Int32T> combined_instance_type =
326 Word32And(left_instance_type, right_instance_type);
327 TNode<Map> result_map = CAST(Select<Object>(
328 IsSetWord32(combined_instance_type, kStringEncodingMask),
329 [=] { return ConsOneByteStringMapConstant(); },
330 [=] { return ConsStringMapConstant(); }));
331 TNode<HeapObject> result = AllocateInNewSpace(ConsString::kSize);
332 StoreMapNoWriteBarrier(result, result_map);
333 StoreObjectFieldNoWriteBarrier(result, ConsString::kLengthOffset, length);
334 StoreObjectFieldNoWriteBarrier(result, ConsString::kRawHashFieldOffset,
335 Int32Constant(String::kEmptyHashField));
336 StoreObjectFieldNoWriteBarrier(result, ConsString::kFirstOffset, left);
337 StoreObjectFieldNoWriteBarrier(result, ConsString::kSecondOffset, right);
338 return CAST(result);
339 }
340
StringAdd(TNode<ContextOrEmptyContext> context,TNode<String> left,TNode<String> right)341 TNode<String> StringBuiltinsAssembler::StringAdd(
342 TNode<ContextOrEmptyContext> context, TNode<String> left,
343 TNode<String> right) {
344 CSA_DCHECK(this, IsZeroOrContext(context));
345
346 TVARIABLE(String, result);
347 Label check_right(this), runtime(this, Label::kDeferred), cons(this),
348 done(this, &result), done_native(this, &result);
349 Counters* counters = isolate()->counters();
350
351 TNode<Uint32T> left_length = LoadStringLengthAsWord32(left);
352 GotoIfNot(Word32Equal(left_length, Uint32Constant(0)), &check_right);
353 result = right;
354 Goto(&done_native);
355
356 BIND(&check_right);
357 TNode<Uint32T> right_length = LoadStringLengthAsWord32(right);
358 GotoIfNot(Word32Equal(right_length, Uint32Constant(0)), &cons);
359 result = left;
360 Goto(&done_native);
361
362 BIND(&cons);
363 {
364 TNode<Uint32T> new_length = Uint32Add(left_length, right_length);
365
366 // If new length is greater than String::kMaxLength, goto runtime to
367 // throw. Note: we also need to invalidate the string length protector, so
368 // can't just throw here directly.
369 GotoIf(Uint32GreaterThan(new_length, Uint32Constant(String::kMaxLength)),
370 &runtime);
371
372 TVARIABLE(String, var_left, left);
373 TVARIABLE(String, var_right, right);
374 Label non_cons(this, {&var_left, &var_right});
375 Label slow(this, Label::kDeferred);
376 GotoIf(Uint32LessThan(new_length, Uint32Constant(ConsString::kMinLength)),
377 &non_cons);
378
379 result =
380 AllocateConsString(new_length, var_left.value(), var_right.value());
381 Goto(&done_native);
382
383 BIND(&non_cons);
384
385 Comment("Full string concatenate");
386 TNode<Int32T> left_instance_type = LoadInstanceType(var_left.value());
387 TNode<Int32T> right_instance_type = LoadInstanceType(var_right.value());
388 // Compute intersection and difference of instance types.
389
390 TNode<Int32T> ored_instance_types =
391 Word32Or(left_instance_type, right_instance_type);
392 TNode<Word32T> xored_instance_types =
393 Word32Xor(left_instance_type, right_instance_type);
394
395 // Check if both strings have the same encoding and both are sequential.
396 GotoIf(IsSetWord32(xored_instance_types, kStringEncodingMask), &runtime);
397 GotoIf(IsSetWord32(ored_instance_types, kStringRepresentationMask), &slow);
398
399 TNode<IntPtrT> word_left_length = Signed(ChangeUint32ToWord(left_length));
400 TNode<IntPtrT> word_right_length = Signed(ChangeUint32ToWord(right_length));
401
402 Label two_byte(this);
403 GotoIf(Word32Equal(Word32And(ored_instance_types,
404 Int32Constant(kStringEncodingMask)),
405 Int32Constant(kTwoByteStringTag)),
406 &two_byte);
407 // One-byte sequential string case
408 result = AllocateSeqOneByteString(new_length);
409 CopyStringCharacters(var_left.value(), result.value(), IntPtrConstant(0),
410 IntPtrConstant(0), word_left_length,
411 String::ONE_BYTE_ENCODING, String::ONE_BYTE_ENCODING);
412 CopyStringCharacters(var_right.value(), result.value(), IntPtrConstant(0),
413 word_left_length, word_right_length,
414 String::ONE_BYTE_ENCODING, String::ONE_BYTE_ENCODING);
415 Goto(&done_native);
416
417 BIND(&two_byte);
418 {
419 // Two-byte sequential string case
420 result = AllocateSeqTwoByteString(new_length);
421 CopyStringCharacters(var_left.value(), result.value(), IntPtrConstant(0),
422 IntPtrConstant(0), word_left_length,
423 String::TWO_BYTE_ENCODING,
424 String::TWO_BYTE_ENCODING);
425 CopyStringCharacters(var_right.value(), result.value(), IntPtrConstant(0),
426 word_left_length, word_right_length,
427 String::TWO_BYTE_ENCODING,
428 String::TWO_BYTE_ENCODING);
429 Goto(&done_native);
430 }
431
432 BIND(&slow);
433 {
434 // Try to unwrap indirect strings, restart the above attempt on success.
435 MaybeDerefIndirectStrings(&var_left, left_instance_type, &var_right,
436 right_instance_type, &non_cons);
437 Goto(&runtime);
438 }
439 }
440 BIND(&runtime);
441 {
442 result = CAST(CallRuntime(Runtime::kStringAdd, context, left, right));
443 Goto(&done);
444 }
445
446 BIND(&done_native);
447 {
448 IncrementCounter(counters->string_add_native(), 1);
449 Goto(&done);
450 }
451
452 BIND(&done);
453 return result.value();
454 }
455
BranchIfCanDerefIndirectString(TNode<String> string,TNode<Int32T> instance_type,Label * can_deref,Label * cannot_deref)456 void StringBuiltinsAssembler::BranchIfCanDerefIndirectString(
457 TNode<String> string, TNode<Int32T> instance_type, Label* can_deref,
458 Label* cannot_deref) {
459 TNode<Int32T> representation =
460 Word32And(instance_type, Int32Constant(kStringRepresentationMask));
461 GotoIf(Word32Equal(representation, Int32Constant(kThinStringTag)), can_deref);
462 GotoIf(Word32NotEqual(representation, Int32Constant(kConsStringTag)),
463 cannot_deref);
464 // Cons string.
465 TNode<String> rhs =
466 LoadObjectField<String>(string, ConsString::kSecondOffset);
467 GotoIf(IsEmptyString(rhs), can_deref);
468 Goto(cannot_deref);
469 }
470
DerefIndirectString(TVariable<String> * var_string,TNode<Int32T> instance_type)471 void StringBuiltinsAssembler::DerefIndirectString(TVariable<String>* var_string,
472 TNode<Int32T> instance_type) {
473 #ifdef DEBUG
474 Label can_deref(this), cannot_deref(this);
475 BranchIfCanDerefIndirectString(var_string->value(), instance_type, &can_deref,
476 &cannot_deref);
477 BIND(&cannot_deref);
478 DebugBreak(); // Should be able to dereference string.
479 Goto(&can_deref);
480 BIND(&can_deref);
481 #endif // DEBUG
482
483 STATIC_ASSERT(static_cast<int>(ThinString::kActualOffset) ==
484 static_cast<int>(ConsString::kFirstOffset));
485 *var_string =
486 LoadObjectField<String>(var_string->value(), ThinString::kActualOffset);
487 }
488
MaybeDerefIndirectString(TVariable<String> * var_string,TNode<Int32T> instance_type,Label * did_deref,Label * cannot_deref)489 void StringBuiltinsAssembler::MaybeDerefIndirectString(
490 TVariable<String>* var_string, TNode<Int32T> instance_type,
491 Label* did_deref, Label* cannot_deref) {
492 Label deref(this);
493 BranchIfCanDerefIndirectString(var_string->value(), instance_type, &deref,
494 cannot_deref);
495
496 BIND(&deref);
497 {
498 DerefIndirectString(var_string, instance_type);
499 Goto(did_deref);
500 }
501 }
502
MaybeDerefIndirectStrings(TVariable<String> * var_left,TNode<Int32T> left_instance_type,TVariable<String> * var_right,TNode<Int32T> right_instance_type,Label * did_something)503 void StringBuiltinsAssembler::MaybeDerefIndirectStrings(
504 TVariable<String>* var_left, TNode<Int32T> left_instance_type,
505 TVariable<String>* var_right, TNode<Int32T> right_instance_type,
506 Label* did_something) {
507 Label did_nothing_left(this), did_something_left(this),
508 didnt_do_anything(this);
509 MaybeDerefIndirectString(var_left, left_instance_type, &did_something_left,
510 &did_nothing_left);
511
512 BIND(&did_something_left);
513 {
514 MaybeDerefIndirectString(var_right, right_instance_type, did_something,
515 did_something);
516 }
517
518 BIND(&did_nothing_left);
519 {
520 MaybeDerefIndirectString(var_right, right_instance_type, did_something,
521 &didnt_do_anything);
522 }
523
524 BIND(&didnt_do_anything);
525 // Fall through if neither string was an indirect string.
526 }
527
DerefIndirectString(TNode<String> string,TNode<Int32T> instance_type,Label * cannot_deref)528 TNode<String> StringBuiltinsAssembler::DerefIndirectString(
529 TNode<String> string, TNode<Int32T> instance_type, Label* cannot_deref) {
530 Label deref(this);
531 BranchIfCanDerefIndirectString(string, instance_type, &deref, cannot_deref);
532 BIND(&deref);
533 STATIC_ASSERT(static_cast<int>(ThinString::kActualOffset) ==
534 static_cast<int>(ConsString::kFirstOffset));
535 return LoadObjectField<String>(string, ThinString::kActualOffset);
536 }
537
TF_BUILTIN(StringAdd_CheckNone,StringBuiltinsAssembler)538 TF_BUILTIN(StringAdd_CheckNone, StringBuiltinsAssembler) {
539 auto left = Parameter<String>(Descriptor::kLeft);
540 auto right = Parameter<String>(Descriptor::kRight);
541 TNode<ContextOrEmptyContext> context =
542 UncheckedParameter<ContextOrEmptyContext>(Descriptor::kContext);
543 CSA_DCHECK(this, IsZeroOrContext(context));
544 Return(StringAdd(context, left, right));
545 }
546
TF_BUILTIN(SubString,StringBuiltinsAssembler)547 TF_BUILTIN(SubString, StringBuiltinsAssembler) {
548 auto string = Parameter<String>(Descriptor::kString);
549 auto from = Parameter<Smi>(Descriptor::kFrom);
550 auto to = Parameter<Smi>(Descriptor::kTo);
551 Return(SubString(string, SmiUntag(from), SmiUntag(to)));
552 }
553
GenerateStringRelationalComparison(TNode<String> left,TNode<String> right,Operation op)554 void StringBuiltinsAssembler::GenerateStringRelationalComparison(
555 TNode<String> left, TNode<String> right, Operation op) {
556 TVARIABLE(String, var_left, left);
557 TVARIABLE(String, var_right, right);
558
559 Label if_less(this), if_equal(this), if_greater(this);
560 Label restart(this, {&var_left, &var_right});
561 Goto(&restart);
562 BIND(&restart);
563
564 TNode<String> lhs = var_left.value();
565 TNode<String> rhs = var_right.value();
566 // Fast check to see if {lhs} and {rhs} refer to the same String object.
567 GotoIf(TaggedEqual(lhs, rhs), &if_equal);
568
569 // Load instance types of {lhs} and {rhs}.
570 TNode<Uint16T> lhs_instance_type = LoadInstanceType(lhs);
571 TNode<Uint16T> rhs_instance_type = LoadInstanceType(rhs);
572
573 // Combine the instance types into a single 16-bit value, so we can check
574 // both of them at once.
575 TNode<Int32T> both_instance_types = Word32Or(
576 lhs_instance_type, Word32Shl(rhs_instance_type, Int32Constant(8)));
577
578 // Check that both {lhs} and {rhs} are flat one-byte strings.
579 int const kBothSeqOneByteStringMask =
580 kStringEncodingMask | kStringRepresentationMask |
581 ((kStringEncodingMask | kStringRepresentationMask) << 8);
582 int const kBothSeqOneByteStringTag =
583 kOneByteStringTag | kSeqStringTag |
584 ((kOneByteStringTag | kSeqStringTag) << 8);
585 Label if_bothonebyteseqstrings(this), if_notbothonebyteseqstrings(this);
586 Branch(Word32Equal(Word32And(both_instance_types,
587 Int32Constant(kBothSeqOneByteStringMask)),
588 Int32Constant(kBothSeqOneByteStringTag)),
589 &if_bothonebyteseqstrings, &if_notbothonebyteseqstrings);
590
591 BIND(&if_bothonebyteseqstrings);
592 {
593 // Load the length of {lhs} and {rhs}.
594 TNode<IntPtrT> lhs_length = LoadStringLengthAsWord(lhs);
595 TNode<IntPtrT> rhs_length = LoadStringLengthAsWord(rhs);
596
597 // Determine the minimum length.
598 TNode<IntPtrT> length = IntPtrMin(lhs_length, rhs_length);
599
600 // Compute the effective offset of the first character.
601 TNode<IntPtrT> begin =
602 IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag);
603
604 // Compute the first offset after the string from the length.
605 TNode<IntPtrT> end = IntPtrAdd(begin, length);
606
607 // Loop over the {lhs} and {rhs} strings to see if they are equal.
608 TVARIABLE(IntPtrT, var_offset, begin);
609 Label loop(this, &var_offset);
610 Goto(&loop);
611 BIND(&loop);
612 {
613 // Check if {offset} equals {end}.
614 Label if_done(this), if_notdone(this);
615 Branch(WordEqual(var_offset.value(), end), &if_done, &if_notdone);
616
617 BIND(&if_notdone);
618 {
619 // Load the next characters from {lhs} and {rhs}.
620 TNode<Uint8T> lhs_value = Load<Uint8T>(lhs, var_offset.value());
621 TNode<Uint8T> rhs_value = Load<Uint8T>(rhs, var_offset.value());
622
623 // Check if the characters match.
624 Label if_valueissame(this), if_valueisnotsame(this);
625 Branch(Word32Equal(lhs_value, rhs_value), &if_valueissame,
626 &if_valueisnotsame);
627
628 BIND(&if_valueissame);
629 {
630 // Advance to next character.
631 var_offset = IntPtrAdd(var_offset.value(), IntPtrConstant(1));
632 }
633 Goto(&loop);
634
635 BIND(&if_valueisnotsame);
636 Branch(Uint32LessThan(lhs_value, rhs_value), &if_less, &if_greater);
637 }
638
639 BIND(&if_done);
640 {
641 // All characters up to the min length are equal, decide based on
642 // string length.
643 GotoIf(IntPtrEqual(lhs_length, rhs_length), &if_equal);
644 Branch(IntPtrLessThan(lhs_length, rhs_length), &if_less, &if_greater);
645 }
646 }
647 }
648
649 BIND(&if_notbothonebyteseqstrings);
650 {
651 // Try to unwrap indirect strings, restart the above attempt on success.
652 MaybeDerefIndirectStrings(&var_left, lhs_instance_type, &var_right,
653 rhs_instance_type, &restart);
654 // TODO(bmeurer): Add support for two byte string relational comparisons.
655 switch (op) {
656 case Operation::kLessThan:
657 TailCallRuntime(Runtime::kStringLessThan, NoContextConstant(), lhs,
658 rhs);
659 break;
660 case Operation::kLessThanOrEqual:
661 TailCallRuntime(Runtime::kStringLessThanOrEqual, NoContextConstant(),
662 lhs, rhs);
663 break;
664 case Operation::kGreaterThan:
665 TailCallRuntime(Runtime::kStringGreaterThan, NoContextConstant(), lhs,
666 rhs);
667 break;
668 case Operation::kGreaterThanOrEqual:
669 TailCallRuntime(Runtime::kStringGreaterThanOrEqual, NoContextConstant(),
670 lhs, rhs);
671 break;
672 default:
673 UNREACHABLE();
674 }
675 }
676
677 BIND(&if_less);
678 switch (op) {
679 case Operation::kLessThan:
680 case Operation::kLessThanOrEqual:
681 Return(TrueConstant());
682 break;
683
684 case Operation::kGreaterThan:
685 case Operation::kGreaterThanOrEqual:
686 Return(FalseConstant());
687 break;
688 default:
689 UNREACHABLE();
690 }
691
692 BIND(&if_equal);
693 switch (op) {
694 case Operation::kLessThan:
695 case Operation::kGreaterThan:
696 Return(FalseConstant());
697 break;
698
699 case Operation::kLessThanOrEqual:
700 case Operation::kGreaterThanOrEqual:
701 Return(TrueConstant());
702 break;
703 default:
704 UNREACHABLE();
705 }
706
707 BIND(&if_greater);
708 switch (op) {
709 case Operation::kLessThan:
710 case Operation::kLessThanOrEqual:
711 Return(FalseConstant());
712 break;
713
714 case Operation::kGreaterThan:
715 case Operation::kGreaterThanOrEqual:
716 Return(TrueConstant());
717 break;
718 default:
719 UNREACHABLE();
720 }
721 }
722
TF_BUILTIN(StringEqual,StringBuiltinsAssembler)723 TF_BUILTIN(StringEqual, StringBuiltinsAssembler) {
724 auto left = Parameter<String>(Descriptor::kLeft);
725 auto right = Parameter<String>(Descriptor::kRight);
726 GenerateStringEqual(left, right);
727 }
728
TF_BUILTIN(StringLessThan,StringBuiltinsAssembler)729 TF_BUILTIN(StringLessThan, StringBuiltinsAssembler) {
730 auto left = Parameter<String>(Descriptor::kLeft);
731 auto right = Parameter<String>(Descriptor::kRight);
732 GenerateStringRelationalComparison(left, right, Operation::kLessThan);
733 }
734
TF_BUILTIN(StringLessThanOrEqual,StringBuiltinsAssembler)735 TF_BUILTIN(StringLessThanOrEqual, StringBuiltinsAssembler) {
736 auto left = Parameter<String>(Descriptor::kLeft);
737 auto right = Parameter<String>(Descriptor::kRight);
738 GenerateStringRelationalComparison(left, right, Operation::kLessThanOrEqual);
739 }
740
TF_BUILTIN(StringGreaterThan,StringBuiltinsAssembler)741 TF_BUILTIN(StringGreaterThan, StringBuiltinsAssembler) {
742 auto left = Parameter<String>(Descriptor::kLeft);
743 auto right = Parameter<String>(Descriptor::kRight);
744 GenerateStringRelationalComparison(left, right, Operation::kGreaterThan);
745 }
746
TF_BUILTIN(StringGreaterThanOrEqual,StringBuiltinsAssembler)747 TF_BUILTIN(StringGreaterThanOrEqual, StringBuiltinsAssembler) {
748 auto left = Parameter<String>(Descriptor::kLeft);
749 auto right = Parameter<String>(Descriptor::kRight);
750 GenerateStringRelationalComparison(left, right,
751 Operation::kGreaterThanOrEqual);
752 }
753
TF_BUILTIN(StringFromCodePointAt,StringBuiltinsAssembler)754 TF_BUILTIN(StringFromCodePointAt, StringBuiltinsAssembler) {
755 auto receiver = Parameter<String>(Descriptor::kReceiver);
756 auto position = UncheckedParameter<IntPtrT>(Descriptor::kPosition);
757
758 // TODO(sigurds) Figure out if passing length as argument pays off.
759 TNode<IntPtrT> length = LoadStringLengthAsWord(receiver);
760 // Load the character code at the {position} from the {receiver}.
761 TNode<Int32T> code =
762 LoadSurrogatePairAt(receiver, length, position, UnicodeEncoding::UTF16);
763 // Create a String from the UTF16 encoded code point
764 TNode<String> result = StringFromSingleUTF16EncodedCodePoint(code);
765 Return(result);
766 }
767
768 // -----------------------------------------------------------------------------
769 // ES6 section 21.1 String Objects
770
771 // ES6 #sec-string.fromcharcode
TF_BUILTIN(StringFromCharCode,StringBuiltinsAssembler)772 TF_BUILTIN(StringFromCharCode, StringBuiltinsAssembler) {
773 // TODO(ishell): use constants from Descriptor once the JSFunction linkage
774 // arguments are reordered.
775 auto argc = UncheckedParameter<Int32T>(Descriptor::kJSActualArgumentsCount);
776 auto context = Parameter<Context>(Descriptor::kContext);
777
778 CodeStubArguments arguments(this, argc);
779 TNode<Uint32T> unsigned_argc =
780 Unsigned(TruncateIntPtrToInt32(arguments.GetLengthWithoutReceiver()));
781 // Check if we have exactly one argument (plus the implicit receiver), i.e.
782 // if the parent frame is not an arguments adaptor frame.
783 Label if_oneargument(this), if_notoneargument(this);
784 Branch(IntPtrEqual(arguments.GetLengthWithoutReceiver(), IntPtrConstant(1)),
785 &if_oneargument, &if_notoneargument);
786
787 BIND(&if_oneargument);
788 {
789 // Single argument case, perform fast single character string cache lookup
790 // for one-byte code units, or fall back to creating a single character
791 // string on the fly otherwise.
792 TNode<Object> code = arguments.AtIndex(0);
793 TNode<Word32T> code32 = TruncateTaggedToWord32(context, code);
794 TNode<Int32T> code16 =
795 Signed(Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit)));
796 TNode<String> result = StringFromSingleCharCode(code16);
797 arguments.PopAndReturn(result);
798 }
799
800 TNode<Word32T> code16;
801 BIND(&if_notoneargument);
802 {
803 Label two_byte(this);
804 // Assume that the resulting string contains only one-byte characters.
805 TNode<String> one_byte_result = AllocateSeqOneByteString(unsigned_argc);
806
807 TVARIABLE(IntPtrT, var_max_index, IntPtrConstant(0));
808
809 // Iterate over the incoming arguments, converting them to 8-bit character
810 // codes. Stop if any of the conversions generates a code that doesn't fit
811 // in 8 bits.
812 CodeStubAssembler::VariableList vars({&var_max_index}, zone());
813 arguments.ForEach(vars, [&](TNode<Object> arg) {
814 TNode<Word32T> code32 = TruncateTaggedToWord32(context, arg);
815 code16 = Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit));
816
817 GotoIf(
818 Int32GreaterThan(code16, Int32Constant(String::kMaxOneByteCharCode)),
819 &two_byte);
820
821 // The {code16} fits into the SeqOneByteString {one_byte_result}.
822 TNode<IntPtrT> offset = ElementOffsetFromIndex(
823 var_max_index.value(), UINT8_ELEMENTS,
824 SeqOneByteString::kHeaderSize - kHeapObjectTag);
825 StoreNoWriteBarrier(MachineRepresentation::kWord8, one_byte_result,
826 offset, code16);
827 var_max_index = IntPtrAdd(var_max_index.value(), IntPtrConstant(1));
828 });
829 arguments.PopAndReturn(one_byte_result);
830
831 BIND(&two_byte);
832
833 // At least one of the characters in the string requires a 16-bit
834 // representation. Allocate a SeqTwoByteString to hold the resulting
835 // string.
836 TNode<String> two_byte_result = AllocateSeqTwoByteString(unsigned_argc);
837
838 // Copy the characters that have already been put in the 8-bit string into
839 // their corresponding positions in the new 16-bit string.
840 TNode<IntPtrT> zero = IntPtrConstant(0);
841 CopyStringCharacters(one_byte_result, two_byte_result, zero, zero,
842 var_max_index.value(), String::ONE_BYTE_ENCODING,
843 String::TWO_BYTE_ENCODING);
844
845 // Write the character that caused the 8-bit to 16-bit fault.
846 TNode<IntPtrT> max_index_offset =
847 ElementOffsetFromIndex(var_max_index.value(), UINT16_ELEMENTS,
848 SeqTwoByteString::kHeaderSize - kHeapObjectTag);
849 StoreNoWriteBarrier(MachineRepresentation::kWord16, two_byte_result,
850 max_index_offset, code16);
851 var_max_index = IntPtrAdd(var_max_index.value(), IntPtrConstant(1));
852
853 // Resume copying the passed-in arguments from the same place where the
854 // 8-bit copy stopped, but this time copying over all of the characters
855 // using a 16-bit representation.
856 arguments.ForEach(
857 vars,
858 [&](TNode<Object> arg) {
859 TNode<Word32T> code32 = TruncateTaggedToWord32(context, arg);
860 TNode<Word32T> code16 =
861 Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit));
862
863 TNode<IntPtrT> offset = ElementOffsetFromIndex(
864 var_max_index.value(), UINT16_ELEMENTS,
865 SeqTwoByteString::kHeaderSize - kHeapObjectTag);
866 StoreNoWriteBarrier(MachineRepresentation::kWord16, two_byte_result,
867 offset, code16);
868 var_max_index = IntPtrAdd(var_max_index.value(), IntPtrConstant(1));
869 },
870 var_max_index.value());
871
872 arguments.PopAndReturn(two_byte_result);
873 }
874 }
875
MaybeCallFunctionAtSymbol(const TNode<Context> context,const TNode<Object> object,const TNode<Object> maybe_string,Handle<Symbol> symbol,DescriptorIndexNameValue additional_property_to_check,const NodeFunction0 & regexp_call,const NodeFunction1 & generic_call)876 void StringBuiltinsAssembler::MaybeCallFunctionAtSymbol(
877 const TNode<Context> context, const TNode<Object> object,
878 const TNode<Object> maybe_string, Handle<Symbol> symbol,
879 DescriptorIndexNameValue additional_property_to_check,
880 const NodeFunction0& regexp_call, const NodeFunction1& generic_call) {
881 Label out(this);
882 Label get_property_lookup(this);
883
884 // Smis have to go through the GetProperty lookup in case Number.prototype or
885 // Object.prototype was modified.
886 GotoIf(TaggedIsSmi(object), &get_property_lookup);
887
888 // Take the fast path for RegExps.
889 // There's two conditions: {object} needs to be a fast regexp, and
890 // {maybe_string} must be a string (we can't call ToString on the fast path
891 // since it may mutate {object}).
892 {
893 Label stub_call(this), slow_lookup(this);
894
895 TNode<HeapObject> heap_object = CAST(object);
896
897 GotoIf(TaggedIsSmi(maybe_string), &slow_lookup);
898 GotoIfNot(IsString(CAST(maybe_string)), &slow_lookup);
899
900 // Note we don't run a full (= permissive) check here, because passing the
901 // check implies calling the fast variants of target builtins, which assume
902 // we've already made their appropriate fast path checks. This is not the
903 // case though; e.g.: some of the target builtins access flag getters.
904 // TODO(jgruber): Handle slow flag accesses on the fast path and make this
905 // permissive.
906 RegExpBuiltinsAssembler regexp_asm(state());
907 regexp_asm.BranchIfFastRegExp(
908 context, heap_object, LoadMap(heap_object),
909 PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
910 additional_property_to_check, &stub_call, &slow_lookup);
911
912 BIND(&stub_call);
913 // TODO(jgruber): Add a no-JS scope once it exists.
914 regexp_call();
915
916 BIND(&slow_lookup);
917 // Special case null and undefined to skip the property lookup.
918 Branch(IsNullOrUndefined(heap_object), &out, &get_property_lookup);
919 }
920
921 // Fall back to a slow lookup of {heap_object[symbol]}.
922 //
923 // The spec uses GetMethod({heap_object}, {symbol}), which has a few quirks:
924 // * null values are turned into undefined, and
925 // * an exception is thrown if the value is not undefined, null, or callable.
926 // We handle the former by jumping to {out} for null values as well, while
927 // the latter is already handled by the Call({maybe_func}) operation.
928
929 BIND(&get_property_lookup);
930 const TNode<Object> maybe_func = GetProperty(context, object, symbol);
931 GotoIf(IsUndefined(maybe_func), &out);
932 GotoIf(IsNull(maybe_func), &out);
933
934 // Attempt to call the function.
935 generic_call(maybe_func);
936
937 BIND(&out);
938 }
939
IndexOfDollarChar(const TNode<Context> context,const TNode<String> string)940 const TNode<Smi> StringBuiltinsAssembler::IndexOfDollarChar(
941 const TNode<Context> context, const TNode<String> string) {
942 const TNode<String> dollar_string = HeapConstant(
943 isolate()->factory()->LookupSingleCharacterStringFromCode('$'));
944 const TNode<Smi> dollar_ix = CAST(CallBuiltin(
945 Builtin::kStringIndexOf, context, string, dollar_string, SmiConstant(0)));
946 return dollar_ix;
947 }
948
GetSubstitution(TNode<Context> context,TNode<String> subject_string,TNode<Smi> match_start_index,TNode<Smi> match_end_index,TNode<String> replace_string)949 TNode<String> StringBuiltinsAssembler::GetSubstitution(
950 TNode<Context> context, TNode<String> subject_string,
951 TNode<Smi> match_start_index, TNode<Smi> match_end_index,
952 TNode<String> replace_string) {
953 CSA_DCHECK(this, TaggedIsPositiveSmi(match_start_index));
954 CSA_DCHECK(this, TaggedIsPositiveSmi(match_end_index));
955
956 TVARIABLE(String, var_result, replace_string);
957 Label runtime(this), out(this);
958
959 // In this primitive implementation we simply look for the next '$' char in
960 // {replace_string}. If it doesn't exist, we can simply return
961 // {replace_string} itself. If it does, then we delegate to
962 // String::GetSubstitution, passing in the index of the first '$' to avoid
963 // repeated scanning work.
964 // TODO(jgruber): Possibly extend this in the future to handle more complex
965 // cases without runtime calls.
966
967 const TNode<Smi> dollar_index = IndexOfDollarChar(context, replace_string);
968 Branch(SmiIsNegative(dollar_index), &out, &runtime);
969
970 BIND(&runtime);
971 {
972 CSA_DCHECK(this, TaggedIsPositiveSmi(dollar_index));
973
974 const TNode<Object> matched =
975 CallBuiltin(Builtin::kStringSubstring, context, subject_string,
976 SmiUntag(match_start_index), SmiUntag(match_end_index));
977 const TNode<String> replacement_string = CAST(
978 CallRuntime(Runtime::kGetSubstitution, context, matched, subject_string,
979 match_start_index, replace_string, dollar_index));
980 var_result = replacement_string;
981
982 Goto(&out);
983 }
984
985 BIND(&out);
986 return var_result.value();
987 }
988
989 // ES6 #sec-string.prototype.replace
TF_BUILTIN(StringPrototypeReplace,StringBuiltinsAssembler)990 TF_BUILTIN(StringPrototypeReplace, StringBuiltinsAssembler) {
991 Label out(this);
992
993 auto receiver = Parameter<Object>(Descriptor::kReceiver);
994 const auto search = Parameter<Object>(Descriptor::kSearch);
995 const auto replace = Parameter<Object>(Descriptor::kReplace);
996 auto context = Parameter<Context>(Descriptor::kContext);
997
998 const TNode<Smi> smi_zero = SmiConstant(0);
999
1000 RequireObjectCoercible(context, receiver, "String.prototype.replace");
1001
1002 // Redirect to replacer method if {search[@@replace]} is not undefined.
1003
1004 MaybeCallFunctionAtSymbol(
1005 context, search, receiver, isolate()->factory()->replace_symbol(),
1006 DescriptorIndexNameValue{JSRegExp::kSymbolReplaceFunctionDescriptorIndex,
1007 RootIndex::kreplace_symbol,
1008 Context::REGEXP_REPLACE_FUNCTION_INDEX},
1009 [=]() {
1010 Return(CallBuiltin(Builtin::kRegExpReplace, context, search, receiver,
1011 replace));
1012 },
1013 [=](TNode<Object> fn) {
1014 Return(Call(context, fn, search, receiver, replace));
1015 });
1016
1017 // Convert {receiver} and {search} to strings.
1018
1019 const TNode<String> subject_string = ToString_Inline(context, receiver);
1020 const TNode<String> search_string = ToString_Inline(context, search);
1021
1022 const TNode<IntPtrT> subject_length = LoadStringLengthAsWord(subject_string);
1023 const TNode<IntPtrT> search_length = LoadStringLengthAsWord(search_string);
1024
1025 // Fast-path single-char {search}, long cons {receiver}, and simple string
1026 // {replace}.
1027 {
1028 Label next(this);
1029
1030 GotoIfNot(WordEqual(search_length, IntPtrConstant(1)), &next);
1031 GotoIfNot(IntPtrGreaterThan(subject_length, IntPtrConstant(0xFF)), &next);
1032 GotoIf(TaggedIsSmi(replace), &next);
1033 GotoIfNot(IsString(CAST(replace)), &next);
1034
1035 TNode<String> replace_string = CAST(replace);
1036 const TNode<Uint16T> subject_instance_type =
1037 LoadInstanceType(subject_string);
1038 GotoIfNot(IsConsStringInstanceType(subject_instance_type), &next);
1039
1040 GotoIf(TaggedIsPositiveSmi(IndexOfDollarChar(context, replace_string)),
1041 &next);
1042
1043 // Searching by traversing a cons string tree and replace with cons of
1044 // slices works only when the replaced string is a single character, being
1045 // replaced by a simple string and only pays off for long strings.
1046 // TODO(jgruber): Reevaluate if this is still beneficial.
1047 // TODO(jgruber): TailCallRuntime when it correctly handles adapter frames.
1048 Return(CallRuntime(Runtime::kStringReplaceOneCharWithString, context,
1049 subject_string, search_string, replace_string));
1050
1051 BIND(&next);
1052 }
1053
1054 // TODO(jgruber): Extend StringIndexOf to handle two-byte strings and
1055 // longer substrings - we can handle up to 8 chars (one-byte) / 4 chars
1056 // (2-byte).
1057
1058 const TNode<Smi> match_start_index =
1059 CAST(CallBuiltin(Builtin::kStringIndexOf, context, subject_string,
1060 search_string, smi_zero));
1061
1062 // Early exit if no match found.
1063 {
1064 Label next(this), return_subject(this);
1065
1066 GotoIfNot(SmiIsNegative(match_start_index), &next);
1067
1068 // The spec requires to perform ToString(replace) if the {replace} is not
1069 // callable even if we are going to exit here.
1070 // Since ToString() being applied to Smi does not have side effects for
1071 // numbers we can skip it.
1072 GotoIf(TaggedIsSmi(replace), &return_subject);
1073 GotoIf(IsCallableMap(LoadMap(CAST(replace))), &return_subject);
1074
1075 // TODO(jgruber): Could introduce ToStringSideeffectsStub which only
1076 // performs observable parts of ToString.
1077 ToString_Inline(context, replace);
1078 Goto(&return_subject);
1079
1080 BIND(&return_subject);
1081 Return(subject_string);
1082
1083 BIND(&next);
1084 }
1085
1086 const TNode<Smi> match_end_index =
1087 SmiAdd(match_start_index, SmiFromIntPtr(search_length));
1088
1089 TVARIABLE(String, var_result, EmptyStringConstant());
1090
1091 // Compute the prefix.
1092 {
1093 Label next(this);
1094
1095 GotoIf(SmiEqual(match_start_index, smi_zero), &next);
1096 const TNode<String> prefix =
1097 CAST(CallBuiltin(Builtin::kStringSubstring, context, subject_string,
1098 IntPtrConstant(0), SmiUntag(match_start_index)));
1099 var_result = prefix;
1100
1101 Goto(&next);
1102 BIND(&next);
1103 }
1104
1105 // Compute the string to replace with.
1106
1107 Label if_iscallablereplace(this), if_notcallablereplace(this);
1108 GotoIf(TaggedIsSmi(replace), &if_notcallablereplace);
1109 Branch(IsCallableMap(LoadMap(CAST(replace))), &if_iscallablereplace,
1110 &if_notcallablereplace);
1111
1112 BIND(&if_iscallablereplace);
1113 {
1114 const TNode<Object> replacement =
1115 Call(context, replace, UndefinedConstant(), search_string,
1116 match_start_index, subject_string);
1117 const TNode<String> replacement_string =
1118 ToString_Inline(context, replacement);
1119 var_result = CAST(CallBuiltin(Builtin::kStringAdd_CheckNone, context,
1120 var_result.value(), replacement_string));
1121 Goto(&out);
1122 }
1123
1124 BIND(&if_notcallablereplace);
1125 {
1126 const TNode<String> replace_string = ToString_Inline(context, replace);
1127 const TNode<Object> replacement =
1128 GetSubstitution(context, subject_string, match_start_index,
1129 match_end_index, replace_string);
1130 var_result = CAST(CallBuiltin(Builtin::kStringAdd_CheckNone, context,
1131 var_result.value(), replacement));
1132 Goto(&out);
1133 }
1134
1135 BIND(&out);
1136 {
1137 const TNode<Object> suffix =
1138 CallBuiltin(Builtin::kStringSubstring, context, subject_string,
1139 SmiUntag(match_end_index), subject_length);
1140 const TNode<Object> result = CallBuiltin(
1141 Builtin::kStringAdd_CheckNone, context, var_result.value(), suffix);
1142 Return(result);
1143 }
1144 }
1145
1146 // ES #sec-string.prototype.matchAll
TF_BUILTIN(StringPrototypeMatchAll,StringBuiltinsAssembler)1147 TF_BUILTIN(StringPrototypeMatchAll, StringBuiltinsAssembler) {
1148 char const* method_name = "String.prototype.matchAll";
1149
1150 auto context = Parameter<Context>(Descriptor::kContext);
1151 auto maybe_regexp = Parameter<Object>(Descriptor::kRegexp);
1152 auto receiver = Parameter<Object>(Descriptor::kReceiver);
1153 TNode<NativeContext> native_context = LoadNativeContext(context);
1154
1155 // 1. Let O be ? RequireObjectCoercible(this value).
1156 RequireObjectCoercible(context, receiver, method_name);
1157
1158 RegExpMatchAllAssembler regexp_asm(state());
1159 {
1160 Label fast(this), slow(this, Label::kDeferred),
1161 throw_exception(this, Label::kDeferred),
1162 throw_flags_exception(this, Label::kDeferred), next(this);
1163
1164 // 2. If regexp is neither undefined nor null, then
1165 // a. Let isRegExp be ? IsRegExp(regexp).
1166 // b. If isRegExp is true, then
1167 // i. Let flags be ? Get(regexp, "flags").
1168 // ii. Perform ? RequireObjectCoercible(flags).
1169 // iii. If ? ToString(flags) does not contain "g", throw a
1170 // TypeError exception.
1171 GotoIf(TaggedIsSmi(maybe_regexp), &next);
1172 TNode<HeapObject> heap_maybe_regexp = CAST(maybe_regexp);
1173 regexp_asm.BranchIfFastRegExpForMatch(context, heap_maybe_regexp, &fast,
1174 &slow);
1175
1176 BIND(&fast);
1177 {
1178 TNode<BoolT> is_global = regexp_asm.FlagGetter(context, heap_maybe_regexp,
1179 JSRegExp::kGlobal, true);
1180 Branch(is_global, &next, &throw_exception);
1181 }
1182
1183 BIND(&slow);
1184 {
1185 GotoIfNot(regexp_asm.IsRegExp(native_context, heap_maybe_regexp), &next);
1186
1187 TNode<Object> flags = GetProperty(context, heap_maybe_regexp,
1188 isolate()->factory()->flags_string());
1189 // TODO(syg): Implement a RequireObjectCoercible with more flexible error
1190 // messages.
1191 GotoIf(IsNullOrUndefined(flags), &throw_flags_exception);
1192
1193 TNode<String> flags_string = ToString_Inline(context, flags);
1194 TNode<String> global_char_string = StringConstant("g");
1195 TNode<Smi> global_ix =
1196 CAST(CallBuiltin(Builtin::kStringIndexOf, context, flags_string,
1197 global_char_string, SmiConstant(0)));
1198 Branch(SmiEqual(global_ix, SmiConstant(-1)), &throw_exception, &next);
1199 }
1200
1201 BIND(&throw_exception);
1202 ThrowTypeError(context, MessageTemplate::kRegExpGlobalInvokedOnNonGlobal,
1203 method_name);
1204
1205 BIND(&throw_flags_exception);
1206 ThrowTypeError(context,
1207 MessageTemplate::kStringMatchAllNullOrUndefinedFlags);
1208
1209 BIND(&next);
1210 }
1211 // a. Let matcher be ? GetMethod(regexp, @@matchAll).
1212 // b. If matcher is not undefined, then
1213 // i. Return ? Call(matcher, regexp, « O »).
1214 auto if_regexp_call = [&] {
1215 // MaybeCallFunctionAtSymbol guarantees fast path is chosen only if
1216 // maybe_regexp is a fast regexp and receiver is a string.
1217 TNode<String> s = CAST(receiver);
1218
1219 Return(
1220 RegExpPrototypeMatchAllImpl(context, native_context, maybe_regexp, s));
1221 };
1222 auto if_generic_call = [=](TNode<Object> fn) {
1223 Return(Call(context, fn, maybe_regexp, receiver));
1224 };
1225 MaybeCallFunctionAtSymbol(
1226 context, maybe_regexp, receiver, isolate()->factory()->match_all_symbol(),
1227 DescriptorIndexNameValue{JSRegExp::kSymbolMatchAllFunctionDescriptorIndex,
1228 RootIndex::kmatch_all_symbol,
1229 Context::REGEXP_MATCH_ALL_FUNCTION_INDEX},
1230 if_regexp_call, if_generic_call);
1231
1232 // 3. Let S be ? ToString(O).
1233 TNode<String> s = ToString_Inline(context, receiver);
1234
1235 // 4. Let rx be ? RegExpCreate(R, "g").
1236 TNode<Object> rx = regexp_asm.RegExpCreate(context, native_context,
1237 maybe_regexp, StringConstant("g"));
1238
1239 // 5. Return ? Invoke(rx, @@matchAll, « S »).
1240 TNode<Object> match_all_func =
1241 GetProperty(context, rx, isolate()->factory()->match_all_symbol());
1242 Return(Call(context, match_all_func, rx, s));
1243 }
1244
StringToArray(TNode<NativeContext> context,TNode<String> subject_string,TNode<Smi> subject_length,TNode<Number> limit_number)1245 TNode<JSArray> StringBuiltinsAssembler::StringToArray(
1246 TNode<NativeContext> context, TNode<String> subject_string,
1247 TNode<Smi> subject_length, TNode<Number> limit_number) {
1248 CSA_DCHECK(this, SmiGreaterThan(subject_length, SmiConstant(0)));
1249
1250 Label done(this), call_runtime(this, Label::kDeferred),
1251 fill_thehole_and_call_runtime(this, Label::kDeferred);
1252 TVARIABLE(JSArray, result_array);
1253
1254 TNode<Uint16T> instance_type = LoadInstanceType(subject_string);
1255 GotoIfNot(IsOneByteStringInstanceType(instance_type), &call_runtime);
1256
1257 // Try to use cached one byte characters.
1258 {
1259 TNode<Smi> length_smi =
1260 Select<Smi>(TaggedIsSmi(limit_number),
1261 [=] { return SmiMin(CAST(limit_number), subject_length); },
1262 [=] { return subject_length; });
1263 TNode<IntPtrT> length = SmiToIntPtr(length_smi);
1264
1265 ToDirectStringAssembler to_direct(state(), subject_string);
1266 to_direct.TryToDirect(&call_runtime);
1267
1268 // The extracted direct string may be two-byte even though the wrapping
1269 // string is one-byte.
1270 GotoIfNot(IsOneByteStringInstanceType(to_direct.instance_type()),
1271 &call_runtime);
1272
1273 TNode<FixedArray> elements = CAST(AllocateFixedArray(
1274 PACKED_ELEMENTS, length, AllocationFlag::kAllowLargeObjectAllocation));
1275 // Don't allocate anything while {string_data} is live!
1276 TNode<RawPtrT> string_data =
1277 to_direct.PointerToData(&fill_thehole_and_call_runtime);
1278 TNode<IntPtrT> string_data_offset = to_direct.offset();
1279 TNode<FixedArray> cache = SingleCharacterStringCacheConstant();
1280
1281 BuildFastLoop<IntPtrT>(
1282 IntPtrConstant(0), length,
1283 [&](TNode<IntPtrT> index) {
1284 // TODO(jkummerow): Implement a CSA version of
1285 // DisallowGarbageCollection and use that to guard
1286 // ToDirectStringAssembler.PointerToData().
1287 CSA_DCHECK(this, WordEqual(to_direct.PointerToData(&call_runtime),
1288 string_data));
1289 TNode<Int32T> char_code =
1290 UncheckedCast<Int32T>(Load(MachineType::Uint8(), string_data,
1291 IntPtrAdd(index, string_data_offset)));
1292 TNode<UintPtrT> code_index = ChangeUint32ToWord(char_code);
1293 TNode<Object> entry = LoadFixedArrayElement(cache, code_index);
1294
1295 // If we cannot find a char in the cache, fill the hole for the fixed
1296 // array, and call runtime.
1297 GotoIf(IsUndefined(entry), &fill_thehole_and_call_runtime);
1298
1299 StoreFixedArrayElement(elements, index, entry);
1300 },
1301 1, IndexAdvanceMode::kPost);
1302
1303 TNode<Map> array_map = LoadJSArrayElementsMap(PACKED_ELEMENTS, context);
1304 result_array = AllocateJSArray(array_map, elements, length_smi);
1305 Goto(&done);
1306
1307 BIND(&fill_thehole_and_call_runtime);
1308 {
1309 FillFixedArrayWithValue(PACKED_ELEMENTS, elements, IntPtrConstant(0),
1310 length, RootIndex::kTheHoleValue);
1311 Goto(&call_runtime);
1312 }
1313 }
1314
1315 BIND(&call_runtime);
1316 {
1317 result_array = CAST(CallRuntime(Runtime::kStringToArray, context,
1318 subject_string, limit_number));
1319 Goto(&done);
1320 }
1321
1322 BIND(&done);
1323 return result_array.value();
1324 }
1325
1326 // ES6 section 21.1.3.19 String.prototype.split ( separator, limit )
TF_BUILTIN(StringPrototypeSplit,StringBuiltinsAssembler)1327 TF_BUILTIN(StringPrototypeSplit, StringBuiltinsAssembler) {
1328 const int kSeparatorArg = 0;
1329 const int kLimitArg = 1;
1330
1331 const TNode<IntPtrT> argc = ChangeInt32ToIntPtr(
1332 UncheckedParameter<Int32T>(Descriptor::kJSActualArgumentsCount));
1333 CodeStubArguments args(this, argc);
1334
1335 TNode<Object> receiver = args.GetReceiver();
1336 const TNode<Object> separator = args.GetOptionalArgumentValue(kSeparatorArg);
1337 const TNode<Object> limit = args.GetOptionalArgumentValue(kLimitArg);
1338 auto context = Parameter<NativeContext>(Descriptor::kContext);
1339
1340 TNode<Smi> smi_zero = SmiConstant(0);
1341
1342 RequireObjectCoercible(context, receiver, "String.prototype.split");
1343
1344 // Redirect to splitter method if {separator[@@split]} is not undefined.
1345
1346 MaybeCallFunctionAtSymbol(
1347 context, separator, receiver, isolate()->factory()->split_symbol(),
1348 DescriptorIndexNameValue{JSRegExp::kSymbolSplitFunctionDescriptorIndex,
1349 RootIndex::ksplit_symbol,
1350 Context::REGEXP_SPLIT_FUNCTION_INDEX},
1351 [&]() {
1352 args.PopAndReturn(CallBuiltin(Builtin::kRegExpSplit, context, separator,
1353 receiver, limit));
1354 },
1355 [&](TNode<Object> fn) {
1356 args.PopAndReturn(Call(context, fn, separator, receiver, limit));
1357 });
1358
1359 // String and integer conversions.
1360
1361 TNode<String> subject_string = ToString_Inline(context, receiver);
1362 TNode<Number> limit_number = Select<Number>(
1363 IsUndefined(limit), [=] { return NumberConstant(kMaxUInt32); },
1364 [=] { return ToUint32(context, limit); });
1365 const TNode<String> separator_string = ToString_Inline(context, separator);
1366
1367 Label return_empty_array(this);
1368
1369 // Shortcut for {limit} == 0.
1370 GotoIf(TaggedEqual(limit_number, smi_zero), &return_empty_array);
1371
1372 // ECMA-262 says that if {separator} is undefined, the result should
1373 // be an array of size 1 containing the entire string.
1374 {
1375 Label next(this);
1376 GotoIfNot(IsUndefined(separator), &next);
1377
1378 const ElementsKind kind = PACKED_ELEMENTS;
1379 const TNode<NativeContext> native_context = LoadNativeContext(context);
1380 TNode<Map> array_map = LoadJSArrayElementsMap(kind, native_context);
1381
1382 TNode<Smi> length = SmiConstant(1);
1383 TNode<IntPtrT> capacity = IntPtrConstant(1);
1384 TNode<JSArray> result = AllocateJSArray(kind, array_map, capacity, length);
1385
1386 TNode<FixedArray> fixed_array = CAST(LoadElements(result));
1387 StoreFixedArrayElement(fixed_array, 0, subject_string);
1388
1389 args.PopAndReturn(result);
1390
1391 BIND(&next);
1392 }
1393
1394 // If the separator string is empty then return the elements in the subject.
1395 {
1396 Label next(this);
1397 GotoIfNot(SmiEqual(LoadStringLengthAsSmi(separator_string), smi_zero),
1398 &next);
1399
1400 TNode<Smi> subject_length = LoadStringLengthAsSmi(subject_string);
1401 GotoIf(SmiEqual(subject_length, smi_zero), &return_empty_array);
1402
1403 args.PopAndReturn(
1404 StringToArray(context, subject_string, subject_length, limit_number));
1405
1406 BIND(&next);
1407 }
1408
1409 const TNode<Object> result =
1410 CallRuntime(Runtime::kStringSplit, context, subject_string,
1411 separator_string, limit_number);
1412 args.PopAndReturn(result);
1413
1414 BIND(&return_empty_array);
1415 {
1416 const ElementsKind kind = PACKED_ELEMENTS;
1417 const TNode<NativeContext> native_context = LoadNativeContext(context);
1418 TNode<Map> array_map = LoadJSArrayElementsMap(kind, native_context);
1419
1420 TNode<Smi> length = smi_zero;
1421 TNode<IntPtrT> capacity = IntPtrConstant(0);
1422 TNode<JSArray> result_array =
1423 AllocateJSArray(kind, array_map, capacity, length);
1424
1425 args.PopAndReturn(result_array);
1426 }
1427 }
1428
TF_BUILTIN(StringSubstring,StringBuiltinsAssembler)1429 TF_BUILTIN(StringSubstring, StringBuiltinsAssembler) {
1430 auto string = Parameter<String>(Descriptor::kString);
1431 auto from = UncheckedParameter<IntPtrT>(Descriptor::kFrom);
1432 auto to = UncheckedParameter<IntPtrT>(Descriptor::kTo);
1433
1434 Return(SubString(string, from, to));
1435 }
1436
1437
1438 // Return the |word32| codepoint at {index}. Supports SeqStrings and
1439 // ExternalStrings.
1440 // TODO(v8:9880): Use UintPtrT here.
LoadSurrogatePairAt(TNode<String> string,TNode<IntPtrT> length,TNode<IntPtrT> index,UnicodeEncoding encoding)1441 TNode<Int32T> StringBuiltinsAssembler::LoadSurrogatePairAt(
1442 TNode<String> string, TNode<IntPtrT> length, TNode<IntPtrT> index,
1443 UnicodeEncoding encoding) {
1444 Label handle_surrogate_pair(this), return_result(this);
1445 TVARIABLE(Int32T, var_result);
1446 TVARIABLE(Int32T, var_trail);
1447 var_result = StringCharCodeAt(string, Unsigned(index));
1448 var_trail = Int32Constant(0);
1449
1450 GotoIf(Word32NotEqual(Word32And(var_result.value(), Int32Constant(0xFC00)),
1451 Int32Constant(0xD800)),
1452 &return_result);
1453 TNode<IntPtrT> next_index = IntPtrAdd(index, IntPtrConstant(1));
1454
1455 GotoIfNot(IntPtrLessThan(next_index, length), &return_result);
1456 var_trail = StringCharCodeAt(string, Unsigned(next_index));
1457 Branch(Word32Equal(Word32And(var_trail.value(), Int32Constant(0xFC00)),
1458 Int32Constant(0xDC00)),
1459 &handle_surrogate_pair, &return_result);
1460
1461 BIND(&handle_surrogate_pair);
1462 {
1463 TNode<Int32T> lead = var_result.value();
1464 TNode<Int32T> trail = var_trail.value();
1465
1466 // Check that this path is only taken if a surrogate pair is found
1467 CSA_SLOW_DCHECK(this,
1468 Uint32GreaterThanOrEqual(lead, Int32Constant(0xD800)));
1469 CSA_SLOW_DCHECK(this, Uint32LessThan(lead, Int32Constant(0xDC00)));
1470 CSA_SLOW_DCHECK(this,
1471 Uint32GreaterThanOrEqual(trail, Int32Constant(0xDC00)));
1472 CSA_SLOW_DCHECK(this, Uint32LessThan(trail, Int32Constant(0xE000)));
1473
1474 switch (encoding) {
1475 case UnicodeEncoding::UTF16:
1476 var_result = Word32Or(
1477 // Need to swap the order for big-endian platforms
1478 #if V8_TARGET_BIG_ENDIAN
1479 Word32Shl(lead, Int32Constant(16)), trail);
1480 #else
1481 Word32Shl(trail, Int32Constant(16)), lead);
1482 #endif
1483 break;
1484
1485 case UnicodeEncoding::UTF32: {
1486 // Convert UTF16 surrogate pair into |word32| code point, encoded as
1487 // UTF32.
1488 TNode<Int32T> surrogate_offset =
1489 Int32Constant(0x10000 - (0xD800 << 10) - 0xDC00);
1490
1491 // (lead << 10) + trail + SURROGATE_OFFSET
1492 var_result = Int32Add(Word32Shl(lead, Int32Constant(10)),
1493 Int32Add(trail, surrogate_offset));
1494 break;
1495 }
1496 }
1497 Goto(&return_result);
1498 }
1499
1500 BIND(&return_result);
1501 return var_result.value();
1502 }
1503
BranchIfStringPrimitiveWithNoCustomIteration(TNode<Object> object,TNode<Context> context,Label * if_true,Label * if_false)1504 void StringBuiltinsAssembler::BranchIfStringPrimitiveWithNoCustomIteration(
1505 TNode<Object> object, TNode<Context> context, Label* if_true,
1506 Label* if_false) {
1507 GotoIf(TaggedIsSmi(object), if_false);
1508 GotoIfNot(IsString(CAST(object)), if_false);
1509
1510 // Check that the String iterator hasn't been modified in a way that would
1511 // affect iteration.
1512 TNode<PropertyCell> protector_cell = StringIteratorProtectorConstant();
1513 DCHECK(isolate()->heap()->string_iterator_protector().IsPropertyCell());
1514 Branch(
1515 TaggedEqual(LoadObjectField(protector_cell, PropertyCell::kValueOffset),
1516 SmiConstant(Protectors::kProtectorValid)),
1517 if_true, if_false);
1518 }
1519
1520 // Instantiate template due to shared library requirements.
1521 template V8_EXPORT_PRIVATE void StringBuiltinsAssembler::CopyStringCharacters(
1522 TNode<String> from_string, TNode<String> to_string,
1523 TNode<IntPtrT> from_index, TNode<IntPtrT> to_index,
1524 TNode<IntPtrT> character_count, String::Encoding from_encoding,
1525 String::Encoding to_encoding);
1526
1527 template V8_EXPORT_PRIVATE void StringBuiltinsAssembler::CopyStringCharacters(
1528 TNode<RawPtrT> from_string, TNode<String> to_string,
1529 TNode<IntPtrT> from_index, TNode<IntPtrT> to_index,
1530 TNode<IntPtrT> character_count, String::Encoding from_encoding,
1531 String::Encoding to_encoding);
1532
1533 template <typename T>
CopyStringCharacters(TNode<T> from_string,TNode<String> to_string,TNode<IntPtrT> from_index,TNode<IntPtrT> to_index,TNode<IntPtrT> character_count,String::Encoding from_encoding,String::Encoding to_encoding)1534 void StringBuiltinsAssembler::CopyStringCharacters(
1535 TNode<T> from_string, TNode<String> to_string, TNode<IntPtrT> from_index,
1536 TNode<IntPtrT> to_index, TNode<IntPtrT> character_count,
1537 String::Encoding from_encoding, String::Encoding to_encoding) {
1538 // from_string could be either a String or a RawPtrT in the case we pass in
1539 // faked sequential strings when handling external subject strings.
1540 bool from_one_byte = from_encoding == String::ONE_BYTE_ENCODING;
1541 bool to_one_byte = to_encoding == String::ONE_BYTE_ENCODING;
1542 DCHECK_IMPLIES(to_one_byte, from_one_byte);
1543 Comment("CopyStringCharacters ",
1544 from_one_byte ? "ONE_BYTE_ENCODING" : "TWO_BYTE_ENCODING", " -> ",
1545 to_one_byte ? "ONE_BYTE_ENCODING" : "TWO_BYTE_ENCODING");
1546
1547 ElementsKind from_kind = from_one_byte ? UINT8_ELEMENTS : UINT16_ELEMENTS;
1548 ElementsKind to_kind = to_one_byte ? UINT8_ELEMENTS : UINT16_ELEMENTS;
1549 STATIC_ASSERT(SeqOneByteString::kHeaderSize == SeqTwoByteString::kHeaderSize);
1550 int header_size = SeqOneByteString::kHeaderSize - kHeapObjectTag;
1551 TNode<IntPtrT> from_offset =
1552 ElementOffsetFromIndex(from_index, from_kind, header_size);
1553 TNode<IntPtrT> to_offset =
1554 ElementOffsetFromIndex(to_index, to_kind, header_size);
1555 TNode<IntPtrT> byte_count =
1556 ElementOffsetFromIndex(character_count, from_kind);
1557 TNode<IntPtrT> limit_offset = IntPtrAdd(from_offset, byte_count);
1558
1559 // Prepare the fast loop
1560 MachineType type =
1561 from_one_byte ? MachineType::Uint8() : MachineType::Uint16();
1562 MachineRepresentation rep = to_one_byte ? MachineRepresentation::kWord8
1563 : MachineRepresentation::kWord16;
1564 int from_increment = 1 << ElementsKindToShiftSize(from_kind);
1565 int to_increment = 1 << ElementsKindToShiftSize(to_kind);
1566
1567 TVARIABLE(IntPtrT, current_to_offset, to_offset);
1568 VariableList vars({¤t_to_offset}, zone());
1569 int to_index_constant = 0, from_index_constant = 0;
1570 bool index_same = (from_encoding == to_encoding) &&
1571 (from_index == to_index ||
1572 (TryToInt32Constant(from_index, &from_index_constant) &&
1573 TryToInt32Constant(to_index, &to_index_constant) &&
1574 from_index_constant == to_index_constant));
1575 BuildFastLoop<IntPtrT>(
1576 vars, from_offset, limit_offset,
1577 [&](TNode<IntPtrT> offset) {
1578 StoreNoWriteBarrier(rep, to_string,
1579 index_same ? offset : current_to_offset.value(),
1580 Load(type, from_string, offset));
1581 if (!index_same) {
1582 Increment(¤t_to_offset, to_increment);
1583 }
1584 },
1585 from_increment, IndexAdvanceMode::kPost);
1586 }
1587
1588 // A wrapper around CopyStringCharacters which determines the correct string
1589 // encoding, allocates a corresponding sequential string, and then copies the
1590 // given character range using CopyStringCharacters.
1591 // |from_string| must be a sequential string.
1592 // 0 <= |from_index| <= |from_index| + |character_count| < from_string.length.
1593 template <typename T>
AllocAndCopyStringCharacters(TNode<T> from,TNode<Int32T> from_instance_type,TNode<IntPtrT> from_index,TNode<IntPtrT> character_count)1594 TNode<String> StringBuiltinsAssembler::AllocAndCopyStringCharacters(
1595 TNode<T> from, TNode<Int32T> from_instance_type, TNode<IntPtrT> from_index,
1596 TNode<IntPtrT> character_count) {
1597 Label end(this), one_byte_sequential(this), two_byte_sequential(this);
1598 TVARIABLE(String, var_result);
1599
1600 Branch(IsOneByteStringInstanceType(from_instance_type), &one_byte_sequential,
1601 &two_byte_sequential);
1602
1603 // The subject string is a sequential one-byte string.
1604 BIND(&one_byte_sequential);
1605 {
1606 TNode<String> result = AllocateSeqOneByteString(
1607 Unsigned(TruncateIntPtrToInt32(character_count)));
1608 CopyStringCharacters<T>(from, result, from_index, IntPtrConstant(0),
1609 character_count, String::ONE_BYTE_ENCODING,
1610 String::ONE_BYTE_ENCODING);
1611 var_result = result;
1612 Goto(&end);
1613 }
1614
1615 // The subject string is a sequential two-byte string.
1616 BIND(&two_byte_sequential);
1617 {
1618 TNode<String> result = AllocateSeqTwoByteString(
1619 Unsigned(TruncateIntPtrToInt32(character_count)));
1620 CopyStringCharacters<T>(from, result, from_index, IntPtrConstant(0),
1621 character_count, String::TWO_BYTE_ENCODING,
1622 String::TWO_BYTE_ENCODING);
1623 var_result = result;
1624 Goto(&end);
1625 }
1626
1627 BIND(&end);
1628 return var_result.value();
1629 }
1630
1631 // TODO(v8:9880): Use UintPtrT here.
SubString(TNode<String> string,TNode<IntPtrT> from,TNode<IntPtrT> to)1632 TNode<String> StringBuiltinsAssembler::SubString(TNode<String> string,
1633 TNode<IntPtrT> from,
1634 TNode<IntPtrT> to) {
1635 TVARIABLE(String, var_result);
1636 ToDirectStringAssembler to_direct(state(), string);
1637 Label end(this), runtime(this);
1638
1639 const TNode<IntPtrT> substr_length = IntPtrSub(to, from);
1640 const TNode<IntPtrT> string_length = LoadStringLengthAsWord(string);
1641
1642 // Begin dispatching based on substring length.
1643
1644 Label original_string_or_invalid_length(this);
1645 GotoIf(UintPtrGreaterThanOrEqual(substr_length, string_length),
1646 &original_string_or_invalid_length);
1647
1648 // A real substring (substr_length < string_length).
1649 Label empty(this);
1650 GotoIf(IntPtrEqual(substr_length, IntPtrConstant(0)), &empty);
1651
1652 Label single_char(this);
1653 GotoIf(IntPtrEqual(substr_length, IntPtrConstant(1)), &single_char);
1654
1655 // Deal with different string types: update the index if necessary
1656 // and extract the underlying string.
1657
1658 TNode<String> direct_string = to_direct.TryToDirect(&runtime);
1659 TNode<IntPtrT> offset = IntPtrAdd(from, to_direct.offset());
1660 const TNode<Int32T> instance_type = to_direct.instance_type();
1661
1662 // The subject string can only be external or sequential string of either
1663 // encoding at this point.
1664 Label external_string(this);
1665 {
1666 if (FLAG_string_slices) {
1667 Label next(this);
1668
1669 // Short slice. Copy instead of slicing.
1670 GotoIf(IntPtrLessThan(substr_length,
1671 IntPtrConstant(SlicedString::kMinLength)),
1672 &next);
1673
1674 // Allocate new sliced string.
1675
1676 Counters* counters = isolate()->counters();
1677 IncrementCounter(counters->sub_string_native(), 1);
1678
1679 Label one_byte_slice(this), two_byte_slice(this);
1680 Branch(IsOneByteStringInstanceType(to_direct.instance_type()),
1681 &one_byte_slice, &two_byte_slice);
1682
1683 BIND(&one_byte_slice);
1684 {
1685 var_result = AllocateSlicedOneByteString(
1686 Unsigned(TruncateIntPtrToInt32(substr_length)), direct_string,
1687 SmiTag(offset));
1688 Goto(&end);
1689 }
1690
1691 BIND(&two_byte_slice);
1692 {
1693 var_result = AllocateSlicedTwoByteString(
1694 Unsigned(TruncateIntPtrToInt32(substr_length)), direct_string,
1695 SmiTag(offset));
1696 Goto(&end);
1697 }
1698
1699 BIND(&next);
1700 }
1701
1702 // The subject string can only be external or sequential string of either
1703 // encoding at this point.
1704 GotoIf(to_direct.is_external(), &external_string);
1705
1706 var_result = AllocAndCopyStringCharacters(direct_string, instance_type,
1707 offset, substr_length);
1708
1709 Counters* counters = isolate()->counters();
1710 IncrementCounter(counters->sub_string_native(), 1);
1711
1712 Goto(&end);
1713 }
1714
1715 // Handle external string.
1716 BIND(&external_string);
1717 {
1718 const TNode<RawPtrT> fake_sequential_string =
1719 to_direct.PointerToString(&runtime);
1720
1721 var_result = AllocAndCopyStringCharacters(
1722 fake_sequential_string, instance_type, offset, substr_length);
1723
1724 Counters* counters = isolate()->counters();
1725 IncrementCounter(counters->sub_string_native(), 1);
1726
1727 Goto(&end);
1728 }
1729
1730 BIND(&empty);
1731 {
1732 var_result = EmptyStringConstant();
1733 Goto(&end);
1734 }
1735
1736 // Substrings of length 1 are generated through CharCodeAt and FromCharCode.
1737 BIND(&single_char);
1738 {
1739 TNode<Int32T> char_code = StringCharCodeAt(string, Unsigned(from));
1740 var_result = StringFromSingleCharCode(char_code);
1741 Goto(&end);
1742 }
1743
1744 BIND(&original_string_or_invalid_length);
1745 {
1746 CSA_DCHECK(this, IntPtrEqual(substr_length, string_length));
1747
1748 // Equal length - check if {from, to} == {0, str.length}.
1749 GotoIf(UintPtrGreaterThan(from, IntPtrConstant(0)), &runtime);
1750
1751 // Return the original string (substr_length == string_length).
1752
1753 Counters* counters = isolate()->counters();
1754 IncrementCounter(counters->sub_string_native(), 1);
1755
1756 var_result = string;
1757 Goto(&end);
1758 }
1759
1760 // Fall back to a runtime call.
1761 BIND(&runtime);
1762 {
1763 var_result =
1764 CAST(CallRuntime(Runtime::kStringSubstring, NoContextConstant(), string,
1765 SmiTag(from), SmiTag(to)));
1766 Goto(&end);
1767 }
1768
1769 BIND(&end);
1770 return var_result.value();
1771 }
1772
1773 } // namespace internal
1774 } // namespace v8
1775