• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/builtins/builtins-string-gen.h"
6 
7 #include "src/base/strings.h"
8 #include "src/builtins/builtins-regexp-gen.h"
9 #include "src/builtins/builtins-utils-gen.h"
10 #include "src/builtins/builtins.h"
11 #include "src/codegen/code-factory.h"
12 #include "src/execution/protectors.h"
13 #include "src/heap/factory-inl.h"
14 #include "src/heap/heap-inl.h"
15 #include "src/logging/counters.h"
16 #include "src/objects/objects.h"
17 #include "src/objects/property-cell.h"
18 
19 namespace v8 {
20 namespace internal {
21 
DirectStringData(TNode<String> string,TNode<Word32T> string_instance_type)22 TNode<RawPtrT> StringBuiltinsAssembler::DirectStringData(
23     TNode<String> string, TNode<Word32T> string_instance_type) {
24   // Compute the effective offset of the first character.
25   TVARIABLE(RawPtrT, var_data);
26   Label if_sequential(this), if_external(this), if_join(this);
27   Branch(Word32Equal(Word32And(string_instance_type,
28                                Int32Constant(kStringRepresentationMask)),
29                      Int32Constant(kSeqStringTag)),
30          &if_sequential, &if_external);
31 
32   BIND(&if_sequential);
33   {
34     var_data = RawPtrAdd(
35         ReinterpretCast<RawPtrT>(BitcastTaggedToWord(string)),
36         IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag));
37     Goto(&if_join);
38   }
39 
40   BIND(&if_external);
41   {
42     var_data = LoadExternalStringResourceDataPtr(CAST(string));
43     Goto(&if_join);
44   }
45 
46   BIND(&if_join);
47   return var_data.value();
48 }
49 
50 template <typename SubjectChar, typename PatternChar>
CallSearchStringRaw(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> search_length,const TNode<IntPtrT> start_position)51 TNode<IntPtrT> StringBuiltinsAssembler::CallSearchStringRaw(
52     const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
53     const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> search_length,
54     const TNode<IntPtrT> start_position) {
55   const TNode<ExternalReference> function_addr = ExternalConstant(
56       ExternalReference::search_string_raw<SubjectChar, PatternChar>());
57   const TNode<ExternalReference> isolate_ptr =
58       ExternalConstant(ExternalReference::isolate_address(isolate()));
59 
60   MachineType type_ptr = MachineType::Pointer();
61   MachineType type_intptr = MachineType::IntPtr();
62 
63   const TNode<IntPtrT> result = UncheckedCast<IntPtrT>(CallCFunction(
64       function_addr, type_intptr, std::make_pair(type_ptr, isolate_ptr),
65       std::make_pair(type_ptr, subject_ptr),
66       std::make_pair(type_intptr, subject_length),
67       std::make_pair(type_ptr, search_ptr),
68       std::make_pair(type_intptr, search_length),
69       std::make_pair(type_intptr, start_position)));
70 
71   return result;
72 }
SearchOneByteStringInTwoByteString(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> search_length,const TNode<IntPtrT> start_position)73 TNode<IntPtrT> StringBuiltinsAssembler::SearchOneByteStringInTwoByteString(
74     const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
75     const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> search_length,
76     const TNode<IntPtrT> start_position) {
77   return CallSearchStringRaw<const base::uc16, const uint8_t>(
78       subject_ptr, subject_length, search_ptr, search_length, start_position);
79 }
SearchOneByteStringInOneByteString(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> search_length,const TNode<IntPtrT> start_position)80 TNode<IntPtrT> StringBuiltinsAssembler::SearchOneByteStringInOneByteString(
81     const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
82     const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> search_length,
83     const TNode<IntPtrT> start_position) {
84   return CallSearchStringRaw<const uint8_t, const uint8_t>(
85       subject_ptr, subject_length, search_ptr, search_length, start_position);
86 }
SearchTwoByteStringInTwoByteString(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> search_length,const TNode<IntPtrT> start_position)87 TNode<IntPtrT> StringBuiltinsAssembler::SearchTwoByteStringInTwoByteString(
88     const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
89     const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> search_length,
90     const TNode<IntPtrT> start_position) {
91   return CallSearchStringRaw<const base::uc16, const base::uc16>(
92       subject_ptr, subject_length, search_ptr, search_length, start_position);
93 }
SearchTwoByteStringInOneByteString(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> search_length,const TNode<IntPtrT> start_position)94 TNode<IntPtrT> StringBuiltinsAssembler::SearchTwoByteStringInOneByteString(
95     const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
96     const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> search_length,
97     const TNode<IntPtrT> start_position) {
98   return CallSearchStringRaw<const uint8_t, const base::uc16>(
99       subject_ptr, subject_length, search_ptr, search_length, start_position);
100 }
SearchOneByteInOneByteString(const TNode<RawPtrT> subject_ptr,const TNode<IntPtrT> subject_length,const TNode<RawPtrT> search_ptr,const TNode<IntPtrT> start_position)101 TNode<IntPtrT> StringBuiltinsAssembler::SearchOneByteInOneByteString(
102     const TNode<RawPtrT> subject_ptr, const TNode<IntPtrT> subject_length,
103     const TNode<RawPtrT> search_ptr, const TNode<IntPtrT> start_position) {
104   const TNode<RawPtrT> subject_start_ptr =
105       RawPtrAdd(subject_ptr, start_position);
106   const TNode<IntPtrT> search_byte =
107       ChangeInt32ToIntPtr(Load<Uint8T>(search_ptr));
108   const TNode<UintPtrT> search_length =
109       Unsigned(IntPtrSub(subject_length, start_position));
110   const TNode<ExternalReference> memchr =
111       ExternalConstant(ExternalReference::libc_memchr_function());
112   const TNode<RawPtrT> result_address = UncheckedCast<RawPtrT>(
113       CallCFunction(memchr, MachineType::Pointer(),
114                     std::make_pair(MachineType::Pointer(), subject_start_ptr),
115                     std::make_pair(MachineType::IntPtr(), search_byte),
116                     std::make_pair(MachineType::UintPtr(), search_length)));
117   return Select<IntPtrT>(
118       WordEqual(result_address, IntPtrConstant(0)),
119       [=] { return IntPtrConstant(-1); },
120       [=] {
121         return IntPtrAdd(RawPtrSub(result_address, subject_start_ptr),
122                          start_position);
123       });
124 }
125 
GenerateStringEqual(TNode<String> left,TNode<String> right)126 void StringBuiltinsAssembler::GenerateStringEqual(TNode<String> left,
127                                                   TNode<String> right) {
128   TVARIABLE(String, var_left, left);
129   TVARIABLE(String, var_right, right);
130   Label if_equal(this), if_notequal(this), if_indirect(this, Label::kDeferred),
131       restart(this, {&var_left, &var_right});
132 
133   TNode<IntPtrT> lhs_length = LoadStringLengthAsWord(left);
134   TNode<IntPtrT> rhs_length = LoadStringLengthAsWord(right);
135 
136   // Strings with different lengths cannot be equal.
137   GotoIf(WordNotEqual(lhs_length, rhs_length), &if_notequal);
138 
139   Goto(&restart);
140   BIND(&restart);
141   TNode<String> lhs = var_left.value();
142   TNode<String> rhs = var_right.value();
143 
144   TNode<Uint16T> lhs_instance_type = LoadInstanceType(lhs);
145   TNode<Uint16T> rhs_instance_type = LoadInstanceType(rhs);
146 
147   StringEqual_Core(lhs, lhs_instance_type, rhs, rhs_instance_type, lhs_length,
148                    &if_equal, &if_notequal, &if_indirect);
149 
150   BIND(&if_indirect);
151   {
152     // Try to unwrap indirect strings, restart the above attempt on success.
153     MaybeDerefIndirectStrings(&var_left, lhs_instance_type, &var_right,
154                               rhs_instance_type, &restart);
155 
156     TailCallRuntime(Runtime::kStringEqual, NoContextConstant(), lhs, rhs);
157   }
158 
159   BIND(&if_equal);
160   Return(TrueConstant());
161 
162   BIND(&if_notequal);
163   Return(FalseConstant());
164 }
165 
StringEqual_Core(TNode<String> lhs,TNode<Word32T> lhs_instance_type,TNode<String> rhs,TNode<Word32T> rhs_instance_type,TNode<IntPtrT> length,Label * if_equal,Label * if_not_equal,Label * if_indirect)166 void StringBuiltinsAssembler::StringEqual_Core(
167     TNode<String> lhs, TNode<Word32T> lhs_instance_type, TNode<String> rhs,
168     TNode<Word32T> rhs_instance_type, TNode<IntPtrT> length, Label* if_equal,
169     Label* if_not_equal, Label* if_indirect) {
170   CSA_DCHECK(this, WordEqual(LoadStringLengthAsWord(lhs), length));
171   CSA_DCHECK(this, WordEqual(LoadStringLengthAsWord(rhs), length));
172   // Fast check to see if {lhs} and {rhs} refer to the same String object.
173   GotoIf(TaggedEqual(lhs, rhs), if_equal);
174 
175   // Combine the instance types into a single 16-bit value, so we can check
176   // both of them at once.
177   TNode<Word32T> both_instance_types = Word32Or(
178       lhs_instance_type, Word32Shl(rhs_instance_type, Int32Constant(8)));
179 
180   // Check if both {lhs} and {rhs} are internalized. Since we already know
181   // that they're not the same object, they're not equal in that case.
182   int const kBothInternalizedMask =
183       kIsNotInternalizedMask | (kIsNotInternalizedMask << 8);
184   int const kBothInternalizedTag = kInternalizedTag | (kInternalizedTag << 8);
185   GotoIf(Word32Equal(Word32And(both_instance_types,
186                                Int32Constant(kBothInternalizedMask)),
187                      Int32Constant(kBothInternalizedTag)),
188          if_not_equal);
189 
190   // Check if both {lhs} and {rhs} are direct strings, and that in case of
191   // ExternalStrings the data pointer is cached.
192   STATIC_ASSERT(kUncachedExternalStringTag != 0);
193   STATIC_ASSERT(kIsIndirectStringTag != 0);
194   int const kBothDirectStringMask =
195       kIsIndirectStringMask | kUncachedExternalStringMask |
196       ((kIsIndirectStringMask | kUncachedExternalStringMask) << 8);
197   GotoIfNot(Word32Equal(Word32And(both_instance_types,
198                                   Int32Constant(kBothDirectStringMask)),
199                         Int32Constant(0)),
200             if_indirect);
201 
202   // Dispatch based on the {lhs} and {rhs} string encoding.
203   int const kBothStringEncodingMask =
204       kStringEncodingMask | (kStringEncodingMask << 8);
205   int const kOneOneByteStringTag = kOneByteStringTag | (kOneByteStringTag << 8);
206   int const kTwoTwoByteStringTag = kTwoByteStringTag | (kTwoByteStringTag << 8);
207   int const kOneTwoByteStringTag = kOneByteStringTag | (kTwoByteStringTag << 8);
208   Label if_oneonebytestring(this), if_twotwobytestring(this),
209       if_onetwobytestring(this), if_twoonebytestring(this);
210   TNode<Word32T> masked_instance_types =
211       Word32And(both_instance_types, Int32Constant(kBothStringEncodingMask));
212   GotoIf(
213       Word32Equal(masked_instance_types, Int32Constant(kOneOneByteStringTag)),
214       &if_oneonebytestring);
215   GotoIf(
216       Word32Equal(masked_instance_types, Int32Constant(kTwoTwoByteStringTag)),
217       &if_twotwobytestring);
218   Branch(
219       Word32Equal(masked_instance_types, Int32Constant(kOneTwoByteStringTag)),
220       &if_onetwobytestring, &if_twoonebytestring);
221 
222   BIND(&if_oneonebytestring);
223   StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint8(), rhs,
224                    rhs_instance_type, MachineType::Uint8(), length, if_equal,
225                    if_not_equal);
226 
227   BIND(&if_twotwobytestring);
228   StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint16(), rhs,
229                    rhs_instance_type, MachineType::Uint16(), length, if_equal,
230                    if_not_equal);
231 
232   BIND(&if_onetwobytestring);
233   StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint8(), rhs,
234                    rhs_instance_type, MachineType::Uint16(), length, if_equal,
235                    if_not_equal);
236 
237   BIND(&if_twoonebytestring);
238   StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint16(), rhs,
239                    rhs_instance_type, MachineType::Uint8(), length, if_equal,
240                    if_not_equal);
241 }
242 
StringEqual_Loop(TNode<String> lhs,TNode<Word32T> lhs_instance_type,MachineType lhs_type,TNode<String> rhs,TNode<Word32T> rhs_instance_type,MachineType rhs_type,TNode<IntPtrT> length,Label * if_equal,Label * if_not_equal)243 void StringBuiltinsAssembler::StringEqual_Loop(
244     TNode<String> lhs, TNode<Word32T> lhs_instance_type, MachineType lhs_type,
245     TNode<String> rhs, TNode<Word32T> rhs_instance_type, MachineType rhs_type,
246     TNode<IntPtrT> length, Label* if_equal, Label* if_not_equal) {
247   CSA_DCHECK(this, WordEqual(LoadStringLengthAsWord(lhs), length));
248   CSA_DCHECK(this, WordEqual(LoadStringLengthAsWord(rhs), length));
249 
250   // Compute the effective offset of the first character.
251   TNode<RawPtrT> lhs_data = DirectStringData(lhs, lhs_instance_type);
252   TNode<RawPtrT> rhs_data = DirectStringData(rhs, rhs_instance_type);
253 
254   // Loop over the {lhs} and {rhs} strings to see if they are equal.
255   TVARIABLE(IntPtrT, var_offset, IntPtrConstant(0));
256   Label loop(this, &var_offset);
257   Goto(&loop);
258   BIND(&loop);
259   {
260     // If {offset} equals {end}, no difference was found, so the
261     // strings are equal.
262     GotoIf(WordEqual(var_offset.value(), length), if_equal);
263 
264     // Load the next characters from {lhs} and {rhs}.
265     TNode<Word32T> lhs_value = UncheckedCast<Word32T>(
266         Load(lhs_type, lhs_data,
267              WordShl(var_offset.value(),
268                      ElementSizeLog2Of(lhs_type.representation()))));
269     TNode<Word32T> rhs_value = UncheckedCast<Word32T>(
270         Load(rhs_type, rhs_data,
271              WordShl(var_offset.value(),
272                      ElementSizeLog2Of(rhs_type.representation()))));
273 
274     // Check if the characters match.
275     GotoIf(Word32NotEqual(lhs_value, rhs_value), if_not_equal);
276 
277     // Advance to next character.
278     var_offset = IntPtrAdd(var_offset.value(), IntPtrConstant(1));
279     Goto(&loop);
280   }
281 }
282 
StringFromSingleUTF16EncodedCodePoint(TNode<Int32T> codepoint)283 TNode<String> StringBuiltinsAssembler::StringFromSingleUTF16EncodedCodePoint(
284     TNode<Int32T> codepoint) {
285   TVARIABLE(String, var_result, EmptyStringConstant());
286 
287   Label if_isword16(this), if_isword32(this), return_result(this);
288 
289   Branch(Uint32LessThan(codepoint, Int32Constant(0x10000)), &if_isword16,
290          &if_isword32);
291 
292   BIND(&if_isword16);
293   {
294     var_result = StringFromSingleCharCode(codepoint);
295     Goto(&return_result);
296   }
297 
298   BIND(&if_isword32);
299   {
300     TNode<String> value = AllocateSeqTwoByteString(2);
301     StoreNoWriteBarrier(
302         MachineRepresentation::kWord32, value,
303         IntPtrConstant(SeqTwoByteString::kHeaderSize - kHeapObjectTag),
304         codepoint);
305     var_result = value;
306     Goto(&return_result);
307   }
308 
309   BIND(&return_result);
310   return var_result.value();
311 }
312 
AllocateConsString(TNode<Uint32T> length,TNode<String> left,TNode<String> right)313 TNode<String> StringBuiltinsAssembler::AllocateConsString(TNode<Uint32T> length,
314                                                           TNode<String> left,
315                                                           TNode<String> right) {
316   // Added string can be a cons string.
317   Comment("Allocating ConsString");
318   TNode<Int32T> left_instance_type = LoadInstanceType(left);
319   TNode<Int32T> right_instance_type = LoadInstanceType(right);
320 
321   // Determine the resulting ConsString map to use depending on whether
322   // any of {left} or {right} has two byte encoding.
323   STATIC_ASSERT(kOneByteStringTag != 0);
324   STATIC_ASSERT(kTwoByteStringTag == 0);
325   TNode<Int32T> combined_instance_type =
326       Word32And(left_instance_type, right_instance_type);
327   TNode<Map> result_map = CAST(Select<Object>(
328       IsSetWord32(combined_instance_type, kStringEncodingMask),
329       [=] { return ConsOneByteStringMapConstant(); },
330       [=] { return ConsStringMapConstant(); }));
331   TNode<HeapObject> result = AllocateInNewSpace(ConsString::kSize);
332   StoreMapNoWriteBarrier(result, result_map);
333   StoreObjectFieldNoWriteBarrier(result, ConsString::kLengthOffset, length);
334   StoreObjectFieldNoWriteBarrier(result, ConsString::kRawHashFieldOffset,
335                                  Int32Constant(String::kEmptyHashField));
336   StoreObjectFieldNoWriteBarrier(result, ConsString::kFirstOffset, left);
337   StoreObjectFieldNoWriteBarrier(result, ConsString::kSecondOffset, right);
338   return CAST(result);
339 }
340 
StringAdd(TNode<ContextOrEmptyContext> context,TNode<String> left,TNode<String> right)341 TNode<String> StringBuiltinsAssembler::StringAdd(
342     TNode<ContextOrEmptyContext> context, TNode<String> left,
343     TNode<String> right) {
344   CSA_DCHECK(this, IsZeroOrContext(context));
345 
346   TVARIABLE(String, result);
347   Label check_right(this), runtime(this, Label::kDeferred), cons(this),
348       done(this, &result), done_native(this, &result);
349   Counters* counters = isolate()->counters();
350 
351   TNode<Uint32T> left_length = LoadStringLengthAsWord32(left);
352   GotoIfNot(Word32Equal(left_length, Uint32Constant(0)), &check_right);
353   result = right;
354   Goto(&done_native);
355 
356   BIND(&check_right);
357   TNode<Uint32T> right_length = LoadStringLengthAsWord32(right);
358   GotoIfNot(Word32Equal(right_length, Uint32Constant(0)), &cons);
359   result = left;
360   Goto(&done_native);
361 
362   BIND(&cons);
363   {
364     TNode<Uint32T> new_length = Uint32Add(left_length, right_length);
365 
366     // If new length is greater than String::kMaxLength, goto runtime to
367     // throw. Note: we also need to invalidate the string length protector, so
368     // can't just throw here directly.
369     GotoIf(Uint32GreaterThan(new_length, Uint32Constant(String::kMaxLength)),
370            &runtime);
371 
372     TVARIABLE(String, var_left, left);
373     TVARIABLE(String, var_right, right);
374     Label non_cons(this, {&var_left, &var_right});
375     Label slow(this, Label::kDeferred);
376     GotoIf(Uint32LessThan(new_length, Uint32Constant(ConsString::kMinLength)),
377            &non_cons);
378 
379     result =
380         AllocateConsString(new_length, var_left.value(), var_right.value());
381     Goto(&done_native);
382 
383     BIND(&non_cons);
384 
385     Comment("Full string concatenate");
386     TNode<Int32T> left_instance_type = LoadInstanceType(var_left.value());
387     TNode<Int32T> right_instance_type = LoadInstanceType(var_right.value());
388     // Compute intersection and difference of instance types.
389 
390     TNode<Int32T> ored_instance_types =
391         Word32Or(left_instance_type, right_instance_type);
392     TNode<Word32T> xored_instance_types =
393         Word32Xor(left_instance_type, right_instance_type);
394 
395     // Check if both strings have the same encoding and both are sequential.
396     GotoIf(IsSetWord32(xored_instance_types, kStringEncodingMask), &runtime);
397     GotoIf(IsSetWord32(ored_instance_types, kStringRepresentationMask), &slow);
398 
399     TNode<IntPtrT> word_left_length = Signed(ChangeUint32ToWord(left_length));
400     TNode<IntPtrT> word_right_length = Signed(ChangeUint32ToWord(right_length));
401 
402     Label two_byte(this);
403     GotoIf(Word32Equal(Word32And(ored_instance_types,
404                                  Int32Constant(kStringEncodingMask)),
405                        Int32Constant(kTwoByteStringTag)),
406            &two_byte);
407     // One-byte sequential string case
408     result = AllocateSeqOneByteString(new_length);
409     CopyStringCharacters(var_left.value(), result.value(), IntPtrConstant(0),
410                          IntPtrConstant(0), word_left_length,
411                          String::ONE_BYTE_ENCODING, String::ONE_BYTE_ENCODING);
412     CopyStringCharacters(var_right.value(), result.value(), IntPtrConstant(0),
413                          word_left_length, word_right_length,
414                          String::ONE_BYTE_ENCODING, String::ONE_BYTE_ENCODING);
415     Goto(&done_native);
416 
417     BIND(&two_byte);
418     {
419       // Two-byte sequential string case
420       result = AllocateSeqTwoByteString(new_length);
421       CopyStringCharacters(var_left.value(), result.value(), IntPtrConstant(0),
422                            IntPtrConstant(0), word_left_length,
423                            String::TWO_BYTE_ENCODING,
424                            String::TWO_BYTE_ENCODING);
425       CopyStringCharacters(var_right.value(), result.value(), IntPtrConstant(0),
426                            word_left_length, word_right_length,
427                            String::TWO_BYTE_ENCODING,
428                            String::TWO_BYTE_ENCODING);
429       Goto(&done_native);
430     }
431 
432     BIND(&slow);
433     {
434       // Try to unwrap indirect strings, restart the above attempt on success.
435       MaybeDerefIndirectStrings(&var_left, left_instance_type, &var_right,
436                                 right_instance_type, &non_cons);
437       Goto(&runtime);
438     }
439   }
440   BIND(&runtime);
441   {
442     result = CAST(CallRuntime(Runtime::kStringAdd, context, left, right));
443     Goto(&done);
444   }
445 
446   BIND(&done_native);
447   {
448     IncrementCounter(counters->string_add_native(), 1);
449     Goto(&done);
450   }
451 
452   BIND(&done);
453   return result.value();
454 }
455 
BranchIfCanDerefIndirectString(TNode<String> string,TNode<Int32T> instance_type,Label * can_deref,Label * cannot_deref)456 void StringBuiltinsAssembler::BranchIfCanDerefIndirectString(
457     TNode<String> string, TNode<Int32T> instance_type, Label* can_deref,
458     Label* cannot_deref) {
459   TNode<Int32T> representation =
460       Word32And(instance_type, Int32Constant(kStringRepresentationMask));
461   GotoIf(Word32Equal(representation, Int32Constant(kThinStringTag)), can_deref);
462   GotoIf(Word32NotEqual(representation, Int32Constant(kConsStringTag)),
463          cannot_deref);
464   // Cons string.
465   TNode<String> rhs =
466       LoadObjectField<String>(string, ConsString::kSecondOffset);
467   GotoIf(IsEmptyString(rhs), can_deref);
468   Goto(cannot_deref);
469 }
470 
DerefIndirectString(TVariable<String> * var_string,TNode<Int32T> instance_type)471 void StringBuiltinsAssembler::DerefIndirectString(TVariable<String>* var_string,
472                                                   TNode<Int32T> instance_type) {
473 #ifdef DEBUG
474   Label can_deref(this), cannot_deref(this);
475   BranchIfCanDerefIndirectString(var_string->value(), instance_type, &can_deref,
476                                  &cannot_deref);
477   BIND(&cannot_deref);
478   DebugBreak();  // Should be able to dereference string.
479   Goto(&can_deref);
480   BIND(&can_deref);
481 #endif  // DEBUG
482 
483   STATIC_ASSERT(static_cast<int>(ThinString::kActualOffset) ==
484                 static_cast<int>(ConsString::kFirstOffset));
485   *var_string =
486       LoadObjectField<String>(var_string->value(), ThinString::kActualOffset);
487 }
488 
MaybeDerefIndirectString(TVariable<String> * var_string,TNode<Int32T> instance_type,Label * did_deref,Label * cannot_deref)489 void StringBuiltinsAssembler::MaybeDerefIndirectString(
490     TVariable<String>* var_string, TNode<Int32T> instance_type,
491     Label* did_deref, Label* cannot_deref) {
492   Label deref(this);
493   BranchIfCanDerefIndirectString(var_string->value(), instance_type, &deref,
494                                  cannot_deref);
495 
496   BIND(&deref);
497   {
498     DerefIndirectString(var_string, instance_type);
499     Goto(did_deref);
500   }
501 }
502 
MaybeDerefIndirectStrings(TVariable<String> * var_left,TNode<Int32T> left_instance_type,TVariable<String> * var_right,TNode<Int32T> right_instance_type,Label * did_something)503 void StringBuiltinsAssembler::MaybeDerefIndirectStrings(
504     TVariable<String>* var_left, TNode<Int32T> left_instance_type,
505     TVariable<String>* var_right, TNode<Int32T> right_instance_type,
506     Label* did_something) {
507   Label did_nothing_left(this), did_something_left(this),
508       didnt_do_anything(this);
509   MaybeDerefIndirectString(var_left, left_instance_type, &did_something_left,
510                            &did_nothing_left);
511 
512   BIND(&did_something_left);
513   {
514     MaybeDerefIndirectString(var_right, right_instance_type, did_something,
515                              did_something);
516   }
517 
518   BIND(&did_nothing_left);
519   {
520     MaybeDerefIndirectString(var_right, right_instance_type, did_something,
521                              &didnt_do_anything);
522   }
523 
524   BIND(&didnt_do_anything);
525   // Fall through if neither string was an indirect string.
526 }
527 
DerefIndirectString(TNode<String> string,TNode<Int32T> instance_type,Label * cannot_deref)528 TNode<String> StringBuiltinsAssembler::DerefIndirectString(
529     TNode<String> string, TNode<Int32T> instance_type, Label* cannot_deref) {
530   Label deref(this);
531   BranchIfCanDerefIndirectString(string, instance_type, &deref, cannot_deref);
532   BIND(&deref);
533   STATIC_ASSERT(static_cast<int>(ThinString::kActualOffset) ==
534                 static_cast<int>(ConsString::kFirstOffset));
535   return LoadObjectField<String>(string, ThinString::kActualOffset);
536 }
537 
TF_BUILTIN(StringAdd_CheckNone,StringBuiltinsAssembler)538 TF_BUILTIN(StringAdd_CheckNone, StringBuiltinsAssembler) {
539   auto left = Parameter<String>(Descriptor::kLeft);
540   auto right = Parameter<String>(Descriptor::kRight);
541   TNode<ContextOrEmptyContext> context =
542       UncheckedParameter<ContextOrEmptyContext>(Descriptor::kContext);
543   CSA_DCHECK(this, IsZeroOrContext(context));
544   Return(StringAdd(context, left, right));
545 }
546 
TF_BUILTIN(SubString,StringBuiltinsAssembler)547 TF_BUILTIN(SubString, StringBuiltinsAssembler) {
548   auto string = Parameter<String>(Descriptor::kString);
549   auto from = Parameter<Smi>(Descriptor::kFrom);
550   auto to = Parameter<Smi>(Descriptor::kTo);
551   Return(SubString(string, SmiUntag(from), SmiUntag(to)));
552 }
553 
GenerateStringRelationalComparison(TNode<String> left,TNode<String> right,Operation op)554 void StringBuiltinsAssembler::GenerateStringRelationalComparison(
555     TNode<String> left, TNode<String> right, Operation op) {
556   TVARIABLE(String, var_left, left);
557   TVARIABLE(String, var_right, right);
558 
559   Label if_less(this), if_equal(this), if_greater(this);
560   Label restart(this, {&var_left, &var_right});
561   Goto(&restart);
562   BIND(&restart);
563 
564   TNode<String> lhs = var_left.value();
565   TNode<String> rhs = var_right.value();
566   // Fast check to see if {lhs} and {rhs} refer to the same String object.
567   GotoIf(TaggedEqual(lhs, rhs), &if_equal);
568 
569   // Load instance types of {lhs} and {rhs}.
570   TNode<Uint16T> lhs_instance_type = LoadInstanceType(lhs);
571   TNode<Uint16T> rhs_instance_type = LoadInstanceType(rhs);
572 
573   // Combine the instance types into a single 16-bit value, so we can check
574   // both of them at once.
575   TNode<Int32T> both_instance_types = Word32Or(
576       lhs_instance_type, Word32Shl(rhs_instance_type, Int32Constant(8)));
577 
578   // Check that both {lhs} and {rhs} are flat one-byte strings.
579   int const kBothSeqOneByteStringMask =
580       kStringEncodingMask | kStringRepresentationMask |
581       ((kStringEncodingMask | kStringRepresentationMask) << 8);
582   int const kBothSeqOneByteStringTag =
583       kOneByteStringTag | kSeqStringTag |
584       ((kOneByteStringTag | kSeqStringTag) << 8);
585   Label if_bothonebyteseqstrings(this), if_notbothonebyteseqstrings(this);
586   Branch(Word32Equal(Word32And(both_instance_types,
587                                Int32Constant(kBothSeqOneByteStringMask)),
588                      Int32Constant(kBothSeqOneByteStringTag)),
589          &if_bothonebyteseqstrings, &if_notbothonebyteseqstrings);
590 
591   BIND(&if_bothonebyteseqstrings);
592   {
593     // Load the length of {lhs} and {rhs}.
594     TNode<IntPtrT> lhs_length = LoadStringLengthAsWord(lhs);
595     TNode<IntPtrT> rhs_length = LoadStringLengthAsWord(rhs);
596 
597     // Determine the minimum length.
598     TNode<IntPtrT> length = IntPtrMin(lhs_length, rhs_length);
599 
600     // Compute the effective offset of the first character.
601     TNode<IntPtrT> begin =
602         IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag);
603 
604     // Compute the first offset after the string from the length.
605     TNode<IntPtrT> end = IntPtrAdd(begin, length);
606 
607     // Loop over the {lhs} and {rhs} strings to see if they are equal.
608     TVARIABLE(IntPtrT, var_offset, begin);
609     Label loop(this, &var_offset);
610     Goto(&loop);
611     BIND(&loop);
612     {
613       // Check if {offset} equals {end}.
614       Label if_done(this), if_notdone(this);
615       Branch(WordEqual(var_offset.value(), end), &if_done, &if_notdone);
616 
617       BIND(&if_notdone);
618       {
619         // Load the next characters from {lhs} and {rhs}.
620         TNode<Uint8T> lhs_value = Load<Uint8T>(lhs, var_offset.value());
621         TNode<Uint8T> rhs_value = Load<Uint8T>(rhs, var_offset.value());
622 
623         // Check if the characters match.
624         Label if_valueissame(this), if_valueisnotsame(this);
625         Branch(Word32Equal(lhs_value, rhs_value), &if_valueissame,
626                &if_valueisnotsame);
627 
628         BIND(&if_valueissame);
629         {
630           // Advance to next character.
631           var_offset = IntPtrAdd(var_offset.value(), IntPtrConstant(1));
632         }
633         Goto(&loop);
634 
635         BIND(&if_valueisnotsame);
636         Branch(Uint32LessThan(lhs_value, rhs_value), &if_less, &if_greater);
637       }
638 
639       BIND(&if_done);
640       {
641         // All characters up to the min length are equal, decide based on
642         // string length.
643         GotoIf(IntPtrEqual(lhs_length, rhs_length), &if_equal);
644         Branch(IntPtrLessThan(lhs_length, rhs_length), &if_less, &if_greater);
645       }
646     }
647   }
648 
649   BIND(&if_notbothonebyteseqstrings);
650   {
651     // Try to unwrap indirect strings, restart the above attempt on success.
652     MaybeDerefIndirectStrings(&var_left, lhs_instance_type, &var_right,
653                               rhs_instance_type, &restart);
654     // TODO(bmeurer): Add support for two byte string relational comparisons.
655     switch (op) {
656       case Operation::kLessThan:
657         TailCallRuntime(Runtime::kStringLessThan, NoContextConstant(), lhs,
658                         rhs);
659         break;
660       case Operation::kLessThanOrEqual:
661         TailCallRuntime(Runtime::kStringLessThanOrEqual, NoContextConstant(),
662                         lhs, rhs);
663         break;
664       case Operation::kGreaterThan:
665         TailCallRuntime(Runtime::kStringGreaterThan, NoContextConstant(), lhs,
666                         rhs);
667         break;
668       case Operation::kGreaterThanOrEqual:
669         TailCallRuntime(Runtime::kStringGreaterThanOrEqual, NoContextConstant(),
670                         lhs, rhs);
671         break;
672       default:
673         UNREACHABLE();
674     }
675   }
676 
677   BIND(&if_less);
678   switch (op) {
679     case Operation::kLessThan:
680     case Operation::kLessThanOrEqual:
681       Return(TrueConstant());
682       break;
683 
684     case Operation::kGreaterThan:
685     case Operation::kGreaterThanOrEqual:
686       Return(FalseConstant());
687       break;
688     default:
689       UNREACHABLE();
690   }
691 
692   BIND(&if_equal);
693   switch (op) {
694     case Operation::kLessThan:
695     case Operation::kGreaterThan:
696       Return(FalseConstant());
697       break;
698 
699     case Operation::kLessThanOrEqual:
700     case Operation::kGreaterThanOrEqual:
701       Return(TrueConstant());
702       break;
703     default:
704       UNREACHABLE();
705   }
706 
707   BIND(&if_greater);
708   switch (op) {
709     case Operation::kLessThan:
710     case Operation::kLessThanOrEqual:
711       Return(FalseConstant());
712       break;
713 
714     case Operation::kGreaterThan:
715     case Operation::kGreaterThanOrEqual:
716       Return(TrueConstant());
717       break;
718     default:
719       UNREACHABLE();
720   }
721 }
722 
TF_BUILTIN(StringEqual,StringBuiltinsAssembler)723 TF_BUILTIN(StringEqual, StringBuiltinsAssembler) {
724   auto left = Parameter<String>(Descriptor::kLeft);
725   auto right = Parameter<String>(Descriptor::kRight);
726   GenerateStringEqual(left, right);
727 }
728 
TF_BUILTIN(StringLessThan,StringBuiltinsAssembler)729 TF_BUILTIN(StringLessThan, StringBuiltinsAssembler) {
730   auto left = Parameter<String>(Descriptor::kLeft);
731   auto right = Parameter<String>(Descriptor::kRight);
732   GenerateStringRelationalComparison(left, right, Operation::kLessThan);
733 }
734 
TF_BUILTIN(StringLessThanOrEqual,StringBuiltinsAssembler)735 TF_BUILTIN(StringLessThanOrEqual, StringBuiltinsAssembler) {
736   auto left = Parameter<String>(Descriptor::kLeft);
737   auto right = Parameter<String>(Descriptor::kRight);
738   GenerateStringRelationalComparison(left, right, Operation::kLessThanOrEqual);
739 }
740 
TF_BUILTIN(StringGreaterThan,StringBuiltinsAssembler)741 TF_BUILTIN(StringGreaterThan, StringBuiltinsAssembler) {
742   auto left = Parameter<String>(Descriptor::kLeft);
743   auto right = Parameter<String>(Descriptor::kRight);
744   GenerateStringRelationalComparison(left, right, Operation::kGreaterThan);
745 }
746 
TF_BUILTIN(StringGreaterThanOrEqual,StringBuiltinsAssembler)747 TF_BUILTIN(StringGreaterThanOrEqual, StringBuiltinsAssembler) {
748   auto left = Parameter<String>(Descriptor::kLeft);
749   auto right = Parameter<String>(Descriptor::kRight);
750   GenerateStringRelationalComparison(left, right,
751                                      Operation::kGreaterThanOrEqual);
752 }
753 
TF_BUILTIN(StringFromCodePointAt,StringBuiltinsAssembler)754 TF_BUILTIN(StringFromCodePointAt, StringBuiltinsAssembler) {
755   auto receiver = Parameter<String>(Descriptor::kReceiver);
756   auto position = UncheckedParameter<IntPtrT>(Descriptor::kPosition);
757 
758   // TODO(sigurds) Figure out if passing length as argument pays off.
759   TNode<IntPtrT> length = LoadStringLengthAsWord(receiver);
760   // Load the character code at the {position} from the {receiver}.
761   TNode<Int32T> code =
762       LoadSurrogatePairAt(receiver, length, position, UnicodeEncoding::UTF16);
763   // Create a String from the UTF16 encoded code point
764   TNode<String> result = StringFromSingleUTF16EncodedCodePoint(code);
765   Return(result);
766 }
767 
768 // -----------------------------------------------------------------------------
769 // ES6 section 21.1 String Objects
770 
771 // ES6 #sec-string.fromcharcode
TF_BUILTIN(StringFromCharCode,StringBuiltinsAssembler)772 TF_BUILTIN(StringFromCharCode, StringBuiltinsAssembler) {
773   // TODO(ishell): use constants from Descriptor once the JSFunction linkage
774   // arguments are reordered.
775   auto argc = UncheckedParameter<Int32T>(Descriptor::kJSActualArgumentsCount);
776   auto context = Parameter<Context>(Descriptor::kContext);
777 
778   CodeStubArguments arguments(this, argc);
779   TNode<Uint32T> unsigned_argc =
780       Unsigned(TruncateIntPtrToInt32(arguments.GetLengthWithoutReceiver()));
781   // Check if we have exactly one argument (plus the implicit receiver), i.e.
782   // if the parent frame is not an arguments adaptor frame.
783   Label if_oneargument(this), if_notoneargument(this);
784   Branch(IntPtrEqual(arguments.GetLengthWithoutReceiver(), IntPtrConstant(1)),
785          &if_oneargument, &if_notoneargument);
786 
787   BIND(&if_oneargument);
788   {
789     // Single argument case, perform fast single character string cache lookup
790     // for one-byte code units, or fall back to creating a single character
791     // string on the fly otherwise.
792     TNode<Object> code = arguments.AtIndex(0);
793     TNode<Word32T> code32 = TruncateTaggedToWord32(context, code);
794     TNode<Int32T> code16 =
795         Signed(Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit)));
796     TNode<String> result = StringFromSingleCharCode(code16);
797     arguments.PopAndReturn(result);
798   }
799 
800   TNode<Word32T> code16;
801   BIND(&if_notoneargument);
802   {
803     Label two_byte(this);
804     // Assume that the resulting string contains only one-byte characters.
805     TNode<String> one_byte_result = AllocateSeqOneByteString(unsigned_argc);
806 
807     TVARIABLE(IntPtrT, var_max_index, IntPtrConstant(0));
808 
809     // Iterate over the incoming arguments, converting them to 8-bit character
810     // codes. Stop if any of the conversions generates a code that doesn't fit
811     // in 8 bits.
812     CodeStubAssembler::VariableList vars({&var_max_index}, zone());
813     arguments.ForEach(vars, [&](TNode<Object> arg) {
814       TNode<Word32T> code32 = TruncateTaggedToWord32(context, arg);
815       code16 = Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit));
816 
817       GotoIf(
818           Int32GreaterThan(code16, Int32Constant(String::kMaxOneByteCharCode)),
819           &two_byte);
820 
821       // The {code16} fits into the SeqOneByteString {one_byte_result}.
822       TNode<IntPtrT> offset = ElementOffsetFromIndex(
823           var_max_index.value(), UINT8_ELEMENTS,
824           SeqOneByteString::kHeaderSize - kHeapObjectTag);
825       StoreNoWriteBarrier(MachineRepresentation::kWord8, one_byte_result,
826                           offset, code16);
827       var_max_index = IntPtrAdd(var_max_index.value(), IntPtrConstant(1));
828     });
829     arguments.PopAndReturn(one_byte_result);
830 
831     BIND(&two_byte);
832 
833     // At least one of the characters in the string requires a 16-bit
834     // representation.  Allocate a SeqTwoByteString to hold the resulting
835     // string.
836     TNode<String> two_byte_result = AllocateSeqTwoByteString(unsigned_argc);
837 
838     // Copy the characters that have already been put in the 8-bit string into
839     // their corresponding positions in the new 16-bit string.
840     TNode<IntPtrT> zero = IntPtrConstant(0);
841     CopyStringCharacters(one_byte_result, two_byte_result, zero, zero,
842                          var_max_index.value(), String::ONE_BYTE_ENCODING,
843                          String::TWO_BYTE_ENCODING);
844 
845     // Write the character that caused the 8-bit to 16-bit fault.
846     TNode<IntPtrT> max_index_offset =
847         ElementOffsetFromIndex(var_max_index.value(), UINT16_ELEMENTS,
848                                SeqTwoByteString::kHeaderSize - kHeapObjectTag);
849     StoreNoWriteBarrier(MachineRepresentation::kWord16, two_byte_result,
850                         max_index_offset, code16);
851     var_max_index = IntPtrAdd(var_max_index.value(), IntPtrConstant(1));
852 
853     // Resume copying the passed-in arguments from the same place where the
854     // 8-bit copy stopped, but this time copying over all of the characters
855     // using a 16-bit representation.
856     arguments.ForEach(
857         vars,
858         [&](TNode<Object> arg) {
859           TNode<Word32T> code32 = TruncateTaggedToWord32(context, arg);
860           TNode<Word32T> code16 =
861               Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit));
862 
863           TNode<IntPtrT> offset = ElementOffsetFromIndex(
864               var_max_index.value(), UINT16_ELEMENTS,
865               SeqTwoByteString::kHeaderSize - kHeapObjectTag);
866           StoreNoWriteBarrier(MachineRepresentation::kWord16, two_byte_result,
867                               offset, code16);
868           var_max_index = IntPtrAdd(var_max_index.value(), IntPtrConstant(1));
869         },
870         var_max_index.value());
871 
872     arguments.PopAndReturn(two_byte_result);
873   }
874 }
875 
MaybeCallFunctionAtSymbol(const TNode<Context> context,const TNode<Object> object,const TNode<Object> maybe_string,Handle<Symbol> symbol,DescriptorIndexNameValue additional_property_to_check,const NodeFunction0 & regexp_call,const NodeFunction1 & generic_call)876 void StringBuiltinsAssembler::MaybeCallFunctionAtSymbol(
877     const TNode<Context> context, const TNode<Object> object,
878     const TNode<Object> maybe_string, Handle<Symbol> symbol,
879     DescriptorIndexNameValue additional_property_to_check,
880     const NodeFunction0& regexp_call, const NodeFunction1& generic_call) {
881   Label out(this);
882   Label get_property_lookup(this);
883 
884   // Smis have to go through the GetProperty lookup in case Number.prototype or
885   // Object.prototype was modified.
886   GotoIf(TaggedIsSmi(object), &get_property_lookup);
887 
888   // Take the fast path for RegExps.
889   // There's two conditions: {object} needs to be a fast regexp, and
890   // {maybe_string} must be a string (we can't call ToString on the fast path
891   // since it may mutate {object}).
892   {
893     Label stub_call(this), slow_lookup(this);
894 
895     TNode<HeapObject> heap_object = CAST(object);
896 
897     GotoIf(TaggedIsSmi(maybe_string), &slow_lookup);
898     GotoIfNot(IsString(CAST(maybe_string)), &slow_lookup);
899 
900     // Note we don't run a full (= permissive) check here, because passing the
901     // check implies calling the fast variants of target builtins, which assume
902     // we've already made their appropriate fast path checks. This is not the
903     // case though; e.g.: some of the target builtins access flag getters.
904     // TODO(jgruber): Handle slow flag accesses on the fast path and make this
905     // permissive.
906     RegExpBuiltinsAssembler regexp_asm(state());
907     regexp_asm.BranchIfFastRegExp(
908         context, heap_object, LoadMap(heap_object),
909         PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
910         additional_property_to_check, &stub_call, &slow_lookup);
911 
912     BIND(&stub_call);
913     // TODO(jgruber): Add a no-JS scope once it exists.
914     regexp_call();
915 
916     BIND(&slow_lookup);
917     // Special case null and undefined to skip the property lookup.
918     Branch(IsNullOrUndefined(heap_object), &out, &get_property_lookup);
919   }
920 
921   // Fall back to a slow lookup of {heap_object[symbol]}.
922   //
923   // The spec uses GetMethod({heap_object}, {symbol}), which has a few quirks:
924   // * null values are turned into undefined, and
925   // * an exception is thrown if the value is not undefined, null, or callable.
926   // We handle the former by jumping to {out} for null values as well, while
927   // the latter is already handled by the Call({maybe_func}) operation.
928 
929   BIND(&get_property_lookup);
930   const TNode<Object> maybe_func = GetProperty(context, object, symbol);
931   GotoIf(IsUndefined(maybe_func), &out);
932   GotoIf(IsNull(maybe_func), &out);
933 
934   // Attempt to call the function.
935   generic_call(maybe_func);
936 
937   BIND(&out);
938 }
939 
IndexOfDollarChar(const TNode<Context> context,const TNode<String> string)940 const TNode<Smi> StringBuiltinsAssembler::IndexOfDollarChar(
941     const TNode<Context> context, const TNode<String> string) {
942   const TNode<String> dollar_string = HeapConstant(
943       isolate()->factory()->LookupSingleCharacterStringFromCode('$'));
944   const TNode<Smi> dollar_ix = CAST(CallBuiltin(
945       Builtin::kStringIndexOf, context, string, dollar_string, SmiConstant(0)));
946   return dollar_ix;
947 }
948 
GetSubstitution(TNode<Context> context,TNode<String> subject_string,TNode<Smi> match_start_index,TNode<Smi> match_end_index,TNode<String> replace_string)949 TNode<String> StringBuiltinsAssembler::GetSubstitution(
950     TNode<Context> context, TNode<String> subject_string,
951     TNode<Smi> match_start_index, TNode<Smi> match_end_index,
952     TNode<String> replace_string) {
953   CSA_DCHECK(this, TaggedIsPositiveSmi(match_start_index));
954   CSA_DCHECK(this, TaggedIsPositiveSmi(match_end_index));
955 
956   TVARIABLE(String, var_result, replace_string);
957   Label runtime(this), out(this);
958 
959   // In this primitive implementation we simply look for the next '$' char in
960   // {replace_string}. If it doesn't exist, we can simply return
961   // {replace_string} itself. If it does, then we delegate to
962   // String::GetSubstitution, passing in the index of the first '$' to avoid
963   // repeated scanning work.
964   // TODO(jgruber): Possibly extend this in the future to handle more complex
965   // cases without runtime calls.
966 
967   const TNode<Smi> dollar_index = IndexOfDollarChar(context, replace_string);
968   Branch(SmiIsNegative(dollar_index), &out, &runtime);
969 
970   BIND(&runtime);
971   {
972     CSA_DCHECK(this, TaggedIsPositiveSmi(dollar_index));
973 
974     const TNode<Object> matched =
975         CallBuiltin(Builtin::kStringSubstring, context, subject_string,
976                     SmiUntag(match_start_index), SmiUntag(match_end_index));
977     const TNode<String> replacement_string = CAST(
978         CallRuntime(Runtime::kGetSubstitution, context, matched, subject_string,
979                     match_start_index, replace_string, dollar_index));
980     var_result = replacement_string;
981 
982     Goto(&out);
983   }
984 
985   BIND(&out);
986   return var_result.value();
987 }
988 
989 // ES6 #sec-string.prototype.replace
TF_BUILTIN(StringPrototypeReplace,StringBuiltinsAssembler)990 TF_BUILTIN(StringPrototypeReplace, StringBuiltinsAssembler) {
991   Label out(this);
992 
993   auto receiver = Parameter<Object>(Descriptor::kReceiver);
994   const auto search = Parameter<Object>(Descriptor::kSearch);
995   const auto replace = Parameter<Object>(Descriptor::kReplace);
996   auto context = Parameter<Context>(Descriptor::kContext);
997 
998   const TNode<Smi> smi_zero = SmiConstant(0);
999 
1000   RequireObjectCoercible(context, receiver, "String.prototype.replace");
1001 
1002   // Redirect to replacer method if {search[@@replace]} is not undefined.
1003 
1004   MaybeCallFunctionAtSymbol(
1005       context, search, receiver, isolate()->factory()->replace_symbol(),
1006       DescriptorIndexNameValue{JSRegExp::kSymbolReplaceFunctionDescriptorIndex,
1007                                RootIndex::kreplace_symbol,
1008                                Context::REGEXP_REPLACE_FUNCTION_INDEX},
1009       [=]() {
1010         Return(CallBuiltin(Builtin::kRegExpReplace, context, search, receiver,
1011                            replace));
1012       },
1013       [=](TNode<Object> fn) {
1014         Return(Call(context, fn, search, receiver, replace));
1015       });
1016 
1017   // Convert {receiver} and {search} to strings.
1018 
1019   const TNode<String> subject_string = ToString_Inline(context, receiver);
1020   const TNode<String> search_string = ToString_Inline(context, search);
1021 
1022   const TNode<IntPtrT> subject_length = LoadStringLengthAsWord(subject_string);
1023   const TNode<IntPtrT> search_length = LoadStringLengthAsWord(search_string);
1024 
1025   // Fast-path single-char {search}, long cons {receiver}, and simple string
1026   // {replace}.
1027   {
1028     Label next(this);
1029 
1030     GotoIfNot(WordEqual(search_length, IntPtrConstant(1)), &next);
1031     GotoIfNot(IntPtrGreaterThan(subject_length, IntPtrConstant(0xFF)), &next);
1032     GotoIf(TaggedIsSmi(replace), &next);
1033     GotoIfNot(IsString(CAST(replace)), &next);
1034 
1035     TNode<String> replace_string = CAST(replace);
1036     const TNode<Uint16T> subject_instance_type =
1037         LoadInstanceType(subject_string);
1038     GotoIfNot(IsConsStringInstanceType(subject_instance_type), &next);
1039 
1040     GotoIf(TaggedIsPositiveSmi(IndexOfDollarChar(context, replace_string)),
1041            &next);
1042 
1043     // Searching by traversing a cons string tree and replace with cons of
1044     // slices works only when the replaced string is a single character, being
1045     // replaced by a simple string and only pays off for long strings.
1046     // TODO(jgruber): Reevaluate if this is still beneficial.
1047     // TODO(jgruber): TailCallRuntime when it correctly handles adapter frames.
1048     Return(CallRuntime(Runtime::kStringReplaceOneCharWithString, context,
1049                        subject_string, search_string, replace_string));
1050 
1051     BIND(&next);
1052   }
1053 
1054   // TODO(jgruber): Extend StringIndexOf to handle two-byte strings and
1055   // longer substrings - we can handle up to 8 chars (one-byte) / 4 chars
1056   // (2-byte).
1057 
1058   const TNode<Smi> match_start_index =
1059       CAST(CallBuiltin(Builtin::kStringIndexOf, context, subject_string,
1060                        search_string, smi_zero));
1061 
1062   // Early exit if no match found.
1063   {
1064     Label next(this), return_subject(this);
1065 
1066     GotoIfNot(SmiIsNegative(match_start_index), &next);
1067 
1068     // The spec requires to perform ToString(replace) if the {replace} is not
1069     // callable even if we are going to exit here.
1070     // Since ToString() being applied to Smi does not have side effects for
1071     // numbers we can skip it.
1072     GotoIf(TaggedIsSmi(replace), &return_subject);
1073     GotoIf(IsCallableMap(LoadMap(CAST(replace))), &return_subject);
1074 
1075     // TODO(jgruber): Could introduce ToStringSideeffectsStub which only
1076     // performs observable parts of ToString.
1077     ToString_Inline(context, replace);
1078     Goto(&return_subject);
1079 
1080     BIND(&return_subject);
1081     Return(subject_string);
1082 
1083     BIND(&next);
1084   }
1085 
1086   const TNode<Smi> match_end_index =
1087       SmiAdd(match_start_index, SmiFromIntPtr(search_length));
1088 
1089   TVARIABLE(String, var_result, EmptyStringConstant());
1090 
1091   // Compute the prefix.
1092   {
1093     Label next(this);
1094 
1095     GotoIf(SmiEqual(match_start_index, smi_zero), &next);
1096     const TNode<String> prefix =
1097         CAST(CallBuiltin(Builtin::kStringSubstring, context, subject_string,
1098                          IntPtrConstant(0), SmiUntag(match_start_index)));
1099     var_result = prefix;
1100 
1101     Goto(&next);
1102     BIND(&next);
1103   }
1104 
1105   // Compute the string to replace with.
1106 
1107   Label if_iscallablereplace(this), if_notcallablereplace(this);
1108   GotoIf(TaggedIsSmi(replace), &if_notcallablereplace);
1109   Branch(IsCallableMap(LoadMap(CAST(replace))), &if_iscallablereplace,
1110          &if_notcallablereplace);
1111 
1112   BIND(&if_iscallablereplace);
1113   {
1114     const TNode<Object> replacement =
1115         Call(context, replace, UndefinedConstant(), search_string,
1116              match_start_index, subject_string);
1117     const TNode<String> replacement_string =
1118         ToString_Inline(context, replacement);
1119     var_result = CAST(CallBuiltin(Builtin::kStringAdd_CheckNone, context,
1120                                   var_result.value(), replacement_string));
1121     Goto(&out);
1122   }
1123 
1124   BIND(&if_notcallablereplace);
1125   {
1126     const TNode<String> replace_string = ToString_Inline(context, replace);
1127     const TNode<Object> replacement =
1128         GetSubstitution(context, subject_string, match_start_index,
1129                         match_end_index, replace_string);
1130     var_result = CAST(CallBuiltin(Builtin::kStringAdd_CheckNone, context,
1131                                   var_result.value(), replacement));
1132     Goto(&out);
1133   }
1134 
1135   BIND(&out);
1136   {
1137     const TNode<Object> suffix =
1138         CallBuiltin(Builtin::kStringSubstring, context, subject_string,
1139                     SmiUntag(match_end_index), subject_length);
1140     const TNode<Object> result = CallBuiltin(
1141         Builtin::kStringAdd_CheckNone, context, var_result.value(), suffix);
1142     Return(result);
1143   }
1144 }
1145 
1146 // ES #sec-string.prototype.matchAll
TF_BUILTIN(StringPrototypeMatchAll,StringBuiltinsAssembler)1147 TF_BUILTIN(StringPrototypeMatchAll, StringBuiltinsAssembler) {
1148   char const* method_name = "String.prototype.matchAll";
1149 
1150   auto context = Parameter<Context>(Descriptor::kContext);
1151   auto maybe_regexp = Parameter<Object>(Descriptor::kRegexp);
1152   auto receiver = Parameter<Object>(Descriptor::kReceiver);
1153   TNode<NativeContext> native_context = LoadNativeContext(context);
1154 
1155   // 1. Let O be ? RequireObjectCoercible(this value).
1156   RequireObjectCoercible(context, receiver, method_name);
1157 
1158   RegExpMatchAllAssembler regexp_asm(state());
1159   {
1160     Label fast(this), slow(this, Label::kDeferred),
1161         throw_exception(this, Label::kDeferred),
1162         throw_flags_exception(this, Label::kDeferred), next(this);
1163 
1164     // 2. If regexp is neither undefined nor null, then
1165     //   a. Let isRegExp be ? IsRegExp(regexp).
1166     //   b. If isRegExp is true, then
1167     //     i. Let flags be ? Get(regexp, "flags").
1168     //    ii. Perform ? RequireObjectCoercible(flags).
1169     //   iii. If ? ToString(flags) does not contain "g", throw a
1170     //        TypeError exception.
1171     GotoIf(TaggedIsSmi(maybe_regexp), &next);
1172     TNode<HeapObject> heap_maybe_regexp = CAST(maybe_regexp);
1173     regexp_asm.BranchIfFastRegExpForMatch(context, heap_maybe_regexp, &fast,
1174                                           &slow);
1175 
1176     BIND(&fast);
1177     {
1178       TNode<BoolT> is_global = regexp_asm.FlagGetter(context, heap_maybe_regexp,
1179                                                      JSRegExp::kGlobal, true);
1180       Branch(is_global, &next, &throw_exception);
1181     }
1182 
1183     BIND(&slow);
1184     {
1185       GotoIfNot(regexp_asm.IsRegExp(native_context, heap_maybe_regexp), &next);
1186 
1187       TNode<Object> flags = GetProperty(context, heap_maybe_regexp,
1188                                         isolate()->factory()->flags_string());
1189       // TODO(syg): Implement a RequireObjectCoercible with more flexible error
1190       // messages.
1191       GotoIf(IsNullOrUndefined(flags), &throw_flags_exception);
1192 
1193       TNode<String> flags_string = ToString_Inline(context, flags);
1194       TNode<String> global_char_string = StringConstant("g");
1195       TNode<Smi> global_ix =
1196           CAST(CallBuiltin(Builtin::kStringIndexOf, context, flags_string,
1197                            global_char_string, SmiConstant(0)));
1198       Branch(SmiEqual(global_ix, SmiConstant(-1)), &throw_exception, &next);
1199     }
1200 
1201     BIND(&throw_exception);
1202     ThrowTypeError(context, MessageTemplate::kRegExpGlobalInvokedOnNonGlobal,
1203                    method_name);
1204 
1205     BIND(&throw_flags_exception);
1206     ThrowTypeError(context,
1207                    MessageTemplate::kStringMatchAllNullOrUndefinedFlags);
1208 
1209     BIND(&next);
1210   }
1211   //   a. Let matcher be ? GetMethod(regexp, @@matchAll).
1212   //   b. If matcher is not undefined, then
1213   //     i. Return ? Call(matcher, regexp, « O »).
1214   auto if_regexp_call = [&] {
1215     // MaybeCallFunctionAtSymbol guarantees fast path is chosen only if
1216     // maybe_regexp is a fast regexp and receiver is a string.
1217     TNode<String> s = CAST(receiver);
1218 
1219     Return(
1220         RegExpPrototypeMatchAllImpl(context, native_context, maybe_regexp, s));
1221   };
1222   auto if_generic_call = [=](TNode<Object> fn) {
1223     Return(Call(context, fn, maybe_regexp, receiver));
1224   };
1225   MaybeCallFunctionAtSymbol(
1226       context, maybe_regexp, receiver, isolate()->factory()->match_all_symbol(),
1227       DescriptorIndexNameValue{JSRegExp::kSymbolMatchAllFunctionDescriptorIndex,
1228                                RootIndex::kmatch_all_symbol,
1229                                Context::REGEXP_MATCH_ALL_FUNCTION_INDEX},
1230       if_regexp_call, if_generic_call);
1231 
1232   // 3. Let S be ? ToString(O).
1233   TNode<String> s = ToString_Inline(context, receiver);
1234 
1235   // 4. Let rx be ? RegExpCreate(R, "g").
1236   TNode<Object> rx = regexp_asm.RegExpCreate(context, native_context,
1237                                              maybe_regexp, StringConstant("g"));
1238 
1239   // 5. Return ? Invoke(rx, @@matchAll, « S »).
1240   TNode<Object> match_all_func =
1241       GetProperty(context, rx, isolate()->factory()->match_all_symbol());
1242   Return(Call(context, match_all_func, rx, s));
1243 }
1244 
StringToArray(TNode<NativeContext> context,TNode<String> subject_string,TNode<Smi> subject_length,TNode<Number> limit_number)1245 TNode<JSArray> StringBuiltinsAssembler::StringToArray(
1246     TNode<NativeContext> context, TNode<String> subject_string,
1247     TNode<Smi> subject_length, TNode<Number> limit_number) {
1248   CSA_DCHECK(this, SmiGreaterThan(subject_length, SmiConstant(0)));
1249 
1250   Label done(this), call_runtime(this, Label::kDeferred),
1251       fill_thehole_and_call_runtime(this, Label::kDeferred);
1252   TVARIABLE(JSArray, result_array);
1253 
1254   TNode<Uint16T> instance_type = LoadInstanceType(subject_string);
1255   GotoIfNot(IsOneByteStringInstanceType(instance_type), &call_runtime);
1256 
1257   // Try to use cached one byte characters.
1258   {
1259     TNode<Smi> length_smi =
1260         Select<Smi>(TaggedIsSmi(limit_number),
1261                     [=] { return SmiMin(CAST(limit_number), subject_length); },
1262                     [=] { return subject_length; });
1263     TNode<IntPtrT> length = SmiToIntPtr(length_smi);
1264 
1265     ToDirectStringAssembler to_direct(state(), subject_string);
1266     to_direct.TryToDirect(&call_runtime);
1267 
1268     // The extracted direct string may be two-byte even though the wrapping
1269     // string is one-byte.
1270     GotoIfNot(IsOneByteStringInstanceType(to_direct.instance_type()),
1271               &call_runtime);
1272 
1273     TNode<FixedArray> elements = CAST(AllocateFixedArray(
1274         PACKED_ELEMENTS, length, AllocationFlag::kAllowLargeObjectAllocation));
1275     // Don't allocate anything while {string_data} is live!
1276     TNode<RawPtrT> string_data =
1277         to_direct.PointerToData(&fill_thehole_and_call_runtime);
1278     TNode<IntPtrT> string_data_offset = to_direct.offset();
1279     TNode<FixedArray> cache = SingleCharacterStringCacheConstant();
1280 
1281     BuildFastLoop<IntPtrT>(
1282         IntPtrConstant(0), length,
1283         [&](TNode<IntPtrT> index) {
1284           // TODO(jkummerow): Implement a CSA version of
1285           // DisallowGarbageCollection and use that to guard
1286           // ToDirectStringAssembler.PointerToData().
1287           CSA_DCHECK(this, WordEqual(to_direct.PointerToData(&call_runtime),
1288                                      string_data));
1289           TNode<Int32T> char_code =
1290               UncheckedCast<Int32T>(Load(MachineType::Uint8(), string_data,
1291                                          IntPtrAdd(index, string_data_offset)));
1292           TNode<UintPtrT> code_index = ChangeUint32ToWord(char_code);
1293           TNode<Object> entry = LoadFixedArrayElement(cache, code_index);
1294 
1295           // If we cannot find a char in the cache, fill the hole for the fixed
1296           // array, and call runtime.
1297           GotoIf(IsUndefined(entry), &fill_thehole_and_call_runtime);
1298 
1299           StoreFixedArrayElement(elements, index, entry);
1300         },
1301         1, IndexAdvanceMode::kPost);
1302 
1303     TNode<Map> array_map = LoadJSArrayElementsMap(PACKED_ELEMENTS, context);
1304     result_array = AllocateJSArray(array_map, elements, length_smi);
1305     Goto(&done);
1306 
1307     BIND(&fill_thehole_and_call_runtime);
1308     {
1309       FillFixedArrayWithValue(PACKED_ELEMENTS, elements, IntPtrConstant(0),
1310                               length, RootIndex::kTheHoleValue);
1311       Goto(&call_runtime);
1312     }
1313   }
1314 
1315   BIND(&call_runtime);
1316   {
1317     result_array = CAST(CallRuntime(Runtime::kStringToArray, context,
1318                                     subject_string, limit_number));
1319     Goto(&done);
1320   }
1321 
1322   BIND(&done);
1323   return result_array.value();
1324 }
1325 
1326 // ES6 section 21.1.3.19 String.prototype.split ( separator, limit )
TF_BUILTIN(StringPrototypeSplit,StringBuiltinsAssembler)1327 TF_BUILTIN(StringPrototypeSplit, StringBuiltinsAssembler) {
1328   const int kSeparatorArg = 0;
1329   const int kLimitArg = 1;
1330 
1331   const TNode<IntPtrT> argc = ChangeInt32ToIntPtr(
1332       UncheckedParameter<Int32T>(Descriptor::kJSActualArgumentsCount));
1333   CodeStubArguments args(this, argc);
1334 
1335   TNode<Object> receiver = args.GetReceiver();
1336   const TNode<Object> separator = args.GetOptionalArgumentValue(kSeparatorArg);
1337   const TNode<Object> limit = args.GetOptionalArgumentValue(kLimitArg);
1338   auto context = Parameter<NativeContext>(Descriptor::kContext);
1339 
1340   TNode<Smi> smi_zero = SmiConstant(0);
1341 
1342   RequireObjectCoercible(context, receiver, "String.prototype.split");
1343 
1344   // Redirect to splitter method if {separator[@@split]} is not undefined.
1345 
1346   MaybeCallFunctionAtSymbol(
1347       context, separator, receiver, isolate()->factory()->split_symbol(),
1348       DescriptorIndexNameValue{JSRegExp::kSymbolSplitFunctionDescriptorIndex,
1349                                RootIndex::ksplit_symbol,
1350                                Context::REGEXP_SPLIT_FUNCTION_INDEX},
1351       [&]() {
1352         args.PopAndReturn(CallBuiltin(Builtin::kRegExpSplit, context, separator,
1353                                       receiver, limit));
1354       },
1355       [&](TNode<Object> fn) {
1356         args.PopAndReturn(Call(context, fn, separator, receiver, limit));
1357       });
1358 
1359   // String and integer conversions.
1360 
1361   TNode<String> subject_string = ToString_Inline(context, receiver);
1362   TNode<Number> limit_number = Select<Number>(
1363       IsUndefined(limit), [=] { return NumberConstant(kMaxUInt32); },
1364       [=] { return ToUint32(context, limit); });
1365   const TNode<String> separator_string = ToString_Inline(context, separator);
1366 
1367   Label return_empty_array(this);
1368 
1369   // Shortcut for {limit} == 0.
1370   GotoIf(TaggedEqual(limit_number, smi_zero), &return_empty_array);
1371 
1372   // ECMA-262 says that if {separator} is undefined, the result should
1373   // be an array of size 1 containing the entire string.
1374   {
1375     Label next(this);
1376     GotoIfNot(IsUndefined(separator), &next);
1377 
1378     const ElementsKind kind = PACKED_ELEMENTS;
1379     const TNode<NativeContext> native_context = LoadNativeContext(context);
1380     TNode<Map> array_map = LoadJSArrayElementsMap(kind, native_context);
1381 
1382     TNode<Smi> length = SmiConstant(1);
1383     TNode<IntPtrT> capacity = IntPtrConstant(1);
1384     TNode<JSArray> result = AllocateJSArray(kind, array_map, capacity, length);
1385 
1386     TNode<FixedArray> fixed_array = CAST(LoadElements(result));
1387     StoreFixedArrayElement(fixed_array, 0, subject_string);
1388 
1389     args.PopAndReturn(result);
1390 
1391     BIND(&next);
1392   }
1393 
1394   // If the separator string is empty then return the elements in the subject.
1395   {
1396     Label next(this);
1397     GotoIfNot(SmiEqual(LoadStringLengthAsSmi(separator_string), smi_zero),
1398               &next);
1399 
1400     TNode<Smi> subject_length = LoadStringLengthAsSmi(subject_string);
1401     GotoIf(SmiEqual(subject_length, smi_zero), &return_empty_array);
1402 
1403     args.PopAndReturn(
1404         StringToArray(context, subject_string, subject_length, limit_number));
1405 
1406     BIND(&next);
1407   }
1408 
1409   const TNode<Object> result =
1410       CallRuntime(Runtime::kStringSplit, context, subject_string,
1411                   separator_string, limit_number);
1412   args.PopAndReturn(result);
1413 
1414   BIND(&return_empty_array);
1415   {
1416     const ElementsKind kind = PACKED_ELEMENTS;
1417     const TNode<NativeContext> native_context = LoadNativeContext(context);
1418     TNode<Map> array_map = LoadJSArrayElementsMap(kind, native_context);
1419 
1420     TNode<Smi> length = smi_zero;
1421     TNode<IntPtrT> capacity = IntPtrConstant(0);
1422     TNode<JSArray> result_array =
1423         AllocateJSArray(kind, array_map, capacity, length);
1424 
1425     args.PopAndReturn(result_array);
1426   }
1427 }
1428 
TF_BUILTIN(StringSubstring,StringBuiltinsAssembler)1429 TF_BUILTIN(StringSubstring, StringBuiltinsAssembler) {
1430   auto string = Parameter<String>(Descriptor::kString);
1431   auto from = UncheckedParameter<IntPtrT>(Descriptor::kFrom);
1432   auto to = UncheckedParameter<IntPtrT>(Descriptor::kTo);
1433 
1434   Return(SubString(string, from, to));
1435 }
1436 
1437 
1438 // Return the |word32| codepoint at {index}. Supports SeqStrings and
1439 // ExternalStrings.
1440 // TODO(v8:9880): Use UintPtrT here.
LoadSurrogatePairAt(TNode<String> string,TNode<IntPtrT> length,TNode<IntPtrT> index,UnicodeEncoding encoding)1441 TNode<Int32T> StringBuiltinsAssembler::LoadSurrogatePairAt(
1442     TNode<String> string, TNode<IntPtrT> length, TNode<IntPtrT> index,
1443     UnicodeEncoding encoding) {
1444   Label handle_surrogate_pair(this), return_result(this);
1445   TVARIABLE(Int32T, var_result);
1446   TVARIABLE(Int32T, var_trail);
1447   var_result = StringCharCodeAt(string, Unsigned(index));
1448   var_trail = Int32Constant(0);
1449 
1450   GotoIf(Word32NotEqual(Word32And(var_result.value(), Int32Constant(0xFC00)),
1451                         Int32Constant(0xD800)),
1452          &return_result);
1453   TNode<IntPtrT> next_index = IntPtrAdd(index, IntPtrConstant(1));
1454 
1455   GotoIfNot(IntPtrLessThan(next_index, length), &return_result);
1456   var_trail = StringCharCodeAt(string, Unsigned(next_index));
1457   Branch(Word32Equal(Word32And(var_trail.value(), Int32Constant(0xFC00)),
1458                      Int32Constant(0xDC00)),
1459          &handle_surrogate_pair, &return_result);
1460 
1461   BIND(&handle_surrogate_pair);
1462   {
1463     TNode<Int32T> lead = var_result.value();
1464     TNode<Int32T> trail = var_trail.value();
1465 
1466     // Check that this path is only taken if a surrogate pair is found
1467     CSA_SLOW_DCHECK(this,
1468                     Uint32GreaterThanOrEqual(lead, Int32Constant(0xD800)));
1469     CSA_SLOW_DCHECK(this, Uint32LessThan(lead, Int32Constant(0xDC00)));
1470     CSA_SLOW_DCHECK(this,
1471                     Uint32GreaterThanOrEqual(trail, Int32Constant(0xDC00)));
1472     CSA_SLOW_DCHECK(this, Uint32LessThan(trail, Int32Constant(0xE000)));
1473 
1474     switch (encoding) {
1475       case UnicodeEncoding::UTF16:
1476         var_result = Word32Or(
1477 // Need to swap the order for big-endian platforms
1478 #if V8_TARGET_BIG_ENDIAN
1479             Word32Shl(lead, Int32Constant(16)), trail);
1480 #else
1481             Word32Shl(trail, Int32Constant(16)), lead);
1482 #endif
1483         break;
1484 
1485       case UnicodeEncoding::UTF32: {
1486         // Convert UTF16 surrogate pair into |word32| code point, encoded as
1487         // UTF32.
1488         TNode<Int32T> surrogate_offset =
1489             Int32Constant(0x10000 - (0xD800 << 10) - 0xDC00);
1490 
1491         // (lead << 10) + trail + SURROGATE_OFFSET
1492         var_result = Int32Add(Word32Shl(lead, Int32Constant(10)),
1493                               Int32Add(trail, surrogate_offset));
1494         break;
1495       }
1496     }
1497     Goto(&return_result);
1498   }
1499 
1500   BIND(&return_result);
1501   return var_result.value();
1502 }
1503 
BranchIfStringPrimitiveWithNoCustomIteration(TNode<Object> object,TNode<Context> context,Label * if_true,Label * if_false)1504 void StringBuiltinsAssembler::BranchIfStringPrimitiveWithNoCustomIteration(
1505     TNode<Object> object, TNode<Context> context, Label* if_true,
1506     Label* if_false) {
1507   GotoIf(TaggedIsSmi(object), if_false);
1508   GotoIfNot(IsString(CAST(object)), if_false);
1509 
1510   // Check that the String iterator hasn't been modified in a way that would
1511   // affect iteration.
1512   TNode<PropertyCell> protector_cell = StringIteratorProtectorConstant();
1513   DCHECK(isolate()->heap()->string_iterator_protector().IsPropertyCell());
1514   Branch(
1515       TaggedEqual(LoadObjectField(protector_cell, PropertyCell::kValueOffset),
1516                   SmiConstant(Protectors::kProtectorValid)),
1517       if_true, if_false);
1518 }
1519 
1520 // Instantiate template due to shared library requirements.
1521 template V8_EXPORT_PRIVATE void StringBuiltinsAssembler::CopyStringCharacters(
1522     TNode<String> from_string, TNode<String> to_string,
1523     TNode<IntPtrT> from_index, TNode<IntPtrT> to_index,
1524     TNode<IntPtrT> character_count, String::Encoding from_encoding,
1525     String::Encoding to_encoding);
1526 
1527 template V8_EXPORT_PRIVATE void StringBuiltinsAssembler::CopyStringCharacters(
1528     TNode<RawPtrT> from_string, TNode<String> to_string,
1529     TNode<IntPtrT> from_index, TNode<IntPtrT> to_index,
1530     TNode<IntPtrT> character_count, String::Encoding from_encoding,
1531     String::Encoding to_encoding);
1532 
1533 template <typename T>
CopyStringCharacters(TNode<T> from_string,TNode<String> to_string,TNode<IntPtrT> from_index,TNode<IntPtrT> to_index,TNode<IntPtrT> character_count,String::Encoding from_encoding,String::Encoding to_encoding)1534 void StringBuiltinsAssembler::CopyStringCharacters(
1535     TNode<T> from_string, TNode<String> to_string, TNode<IntPtrT> from_index,
1536     TNode<IntPtrT> to_index, TNode<IntPtrT> character_count,
1537     String::Encoding from_encoding, String::Encoding to_encoding) {
1538   // from_string could be either a String or a RawPtrT in the case we pass in
1539   // faked sequential strings when handling external subject strings.
1540   bool from_one_byte = from_encoding == String::ONE_BYTE_ENCODING;
1541   bool to_one_byte = to_encoding == String::ONE_BYTE_ENCODING;
1542   DCHECK_IMPLIES(to_one_byte, from_one_byte);
1543   Comment("CopyStringCharacters ",
1544           from_one_byte ? "ONE_BYTE_ENCODING" : "TWO_BYTE_ENCODING", " -> ",
1545           to_one_byte ? "ONE_BYTE_ENCODING" : "TWO_BYTE_ENCODING");
1546 
1547   ElementsKind from_kind = from_one_byte ? UINT8_ELEMENTS : UINT16_ELEMENTS;
1548   ElementsKind to_kind = to_one_byte ? UINT8_ELEMENTS : UINT16_ELEMENTS;
1549   STATIC_ASSERT(SeqOneByteString::kHeaderSize == SeqTwoByteString::kHeaderSize);
1550   int header_size = SeqOneByteString::kHeaderSize - kHeapObjectTag;
1551   TNode<IntPtrT> from_offset =
1552       ElementOffsetFromIndex(from_index, from_kind, header_size);
1553   TNode<IntPtrT> to_offset =
1554       ElementOffsetFromIndex(to_index, to_kind, header_size);
1555   TNode<IntPtrT> byte_count =
1556       ElementOffsetFromIndex(character_count, from_kind);
1557   TNode<IntPtrT> limit_offset = IntPtrAdd(from_offset, byte_count);
1558 
1559   // Prepare the fast loop
1560   MachineType type =
1561       from_one_byte ? MachineType::Uint8() : MachineType::Uint16();
1562   MachineRepresentation rep = to_one_byte ? MachineRepresentation::kWord8
1563                                           : MachineRepresentation::kWord16;
1564   int from_increment = 1 << ElementsKindToShiftSize(from_kind);
1565   int to_increment = 1 << ElementsKindToShiftSize(to_kind);
1566 
1567   TVARIABLE(IntPtrT, current_to_offset, to_offset);
1568   VariableList vars({&current_to_offset}, zone());
1569   int to_index_constant = 0, from_index_constant = 0;
1570   bool index_same = (from_encoding == to_encoding) &&
1571                     (from_index == to_index ||
1572                      (TryToInt32Constant(from_index, &from_index_constant) &&
1573                       TryToInt32Constant(to_index, &to_index_constant) &&
1574                       from_index_constant == to_index_constant));
1575   BuildFastLoop<IntPtrT>(
1576       vars, from_offset, limit_offset,
1577       [&](TNode<IntPtrT> offset) {
1578         StoreNoWriteBarrier(rep, to_string,
1579                             index_same ? offset : current_to_offset.value(),
1580                             Load(type, from_string, offset));
1581         if (!index_same) {
1582           Increment(&current_to_offset, to_increment);
1583         }
1584       },
1585       from_increment, IndexAdvanceMode::kPost);
1586 }
1587 
1588 // A wrapper around CopyStringCharacters which determines the correct string
1589 // encoding, allocates a corresponding sequential string, and then copies the
1590 // given character range using CopyStringCharacters.
1591 // |from_string| must be a sequential string.
1592 // 0 <= |from_index| <= |from_index| + |character_count| < from_string.length.
1593 template <typename T>
AllocAndCopyStringCharacters(TNode<T> from,TNode<Int32T> from_instance_type,TNode<IntPtrT> from_index,TNode<IntPtrT> character_count)1594 TNode<String> StringBuiltinsAssembler::AllocAndCopyStringCharacters(
1595     TNode<T> from, TNode<Int32T> from_instance_type, TNode<IntPtrT> from_index,
1596     TNode<IntPtrT> character_count) {
1597   Label end(this), one_byte_sequential(this), two_byte_sequential(this);
1598   TVARIABLE(String, var_result);
1599 
1600   Branch(IsOneByteStringInstanceType(from_instance_type), &one_byte_sequential,
1601          &two_byte_sequential);
1602 
1603   // The subject string is a sequential one-byte string.
1604   BIND(&one_byte_sequential);
1605   {
1606     TNode<String> result = AllocateSeqOneByteString(
1607         Unsigned(TruncateIntPtrToInt32(character_count)));
1608     CopyStringCharacters<T>(from, result, from_index, IntPtrConstant(0),
1609                             character_count, String::ONE_BYTE_ENCODING,
1610                             String::ONE_BYTE_ENCODING);
1611     var_result = result;
1612     Goto(&end);
1613   }
1614 
1615   // The subject string is a sequential two-byte string.
1616   BIND(&two_byte_sequential);
1617   {
1618     TNode<String> result = AllocateSeqTwoByteString(
1619         Unsigned(TruncateIntPtrToInt32(character_count)));
1620     CopyStringCharacters<T>(from, result, from_index, IntPtrConstant(0),
1621                             character_count, String::TWO_BYTE_ENCODING,
1622                             String::TWO_BYTE_ENCODING);
1623     var_result = result;
1624     Goto(&end);
1625   }
1626 
1627   BIND(&end);
1628   return var_result.value();
1629 }
1630 
1631 // TODO(v8:9880): Use UintPtrT here.
SubString(TNode<String> string,TNode<IntPtrT> from,TNode<IntPtrT> to)1632 TNode<String> StringBuiltinsAssembler::SubString(TNode<String> string,
1633                                                  TNode<IntPtrT> from,
1634                                                  TNode<IntPtrT> to) {
1635   TVARIABLE(String, var_result);
1636   ToDirectStringAssembler to_direct(state(), string);
1637   Label end(this), runtime(this);
1638 
1639   const TNode<IntPtrT> substr_length = IntPtrSub(to, from);
1640   const TNode<IntPtrT> string_length = LoadStringLengthAsWord(string);
1641 
1642   // Begin dispatching based on substring length.
1643 
1644   Label original_string_or_invalid_length(this);
1645   GotoIf(UintPtrGreaterThanOrEqual(substr_length, string_length),
1646          &original_string_or_invalid_length);
1647 
1648   // A real substring (substr_length < string_length).
1649   Label empty(this);
1650   GotoIf(IntPtrEqual(substr_length, IntPtrConstant(0)), &empty);
1651 
1652   Label single_char(this);
1653   GotoIf(IntPtrEqual(substr_length, IntPtrConstant(1)), &single_char);
1654 
1655   // Deal with different string types: update the index if necessary
1656   // and extract the underlying string.
1657 
1658   TNode<String> direct_string = to_direct.TryToDirect(&runtime);
1659   TNode<IntPtrT> offset = IntPtrAdd(from, to_direct.offset());
1660   const TNode<Int32T> instance_type = to_direct.instance_type();
1661 
1662   // The subject string can only be external or sequential string of either
1663   // encoding at this point.
1664   Label external_string(this);
1665   {
1666     if (FLAG_string_slices) {
1667       Label next(this);
1668 
1669       // Short slice.  Copy instead of slicing.
1670       GotoIf(IntPtrLessThan(substr_length,
1671                             IntPtrConstant(SlicedString::kMinLength)),
1672              &next);
1673 
1674       // Allocate new sliced string.
1675 
1676       Counters* counters = isolate()->counters();
1677       IncrementCounter(counters->sub_string_native(), 1);
1678 
1679       Label one_byte_slice(this), two_byte_slice(this);
1680       Branch(IsOneByteStringInstanceType(to_direct.instance_type()),
1681              &one_byte_slice, &two_byte_slice);
1682 
1683       BIND(&one_byte_slice);
1684       {
1685         var_result = AllocateSlicedOneByteString(
1686             Unsigned(TruncateIntPtrToInt32(substr_length)), direct_string,
1687             SmiTag(offset));
1688         Goto(&end);
1689       }
1690 
1691       BIND(&two_byte_slice);
1692       {
1693         var_result = AllocateSlicedTwoByteString(
1694             Unsigned(TruncateIntPtrToInt32(substr_length)), direct_string,
1695             SmiTag(offset));
1696         Goto(&end);
1697       }
1698 
1699       BIND(&next);
1700     }
1701 
1702     // The subject string can only be external or sequential string of either
1703     // encoding at this point.
1704     GotoIf(to_direct.is_external(), &external_string);
1705 
1706     var_result = AllocAndCopyStringCharacters(direct_string, instance_type,
1707                                               offset, substr_length);
1708 
1709     Counters* counters = isolate()->counters();
1710     IncrementCounter(counters->sub_string_native(), 1);
1711 
1712     Goto(&end);
1713   }
1714 
1715   // Handle external string.
1716   BIND(&external_string);
1717   {
1718     const TNode<RawPtrT> fake_sequential_string =
1719         to_direct.PointerToString(&runtime);
1720 
1721     var_result = AllocAndCopyStringCharacters(
1722         fake_sequential_string, instance_type, offset, substr_length);
1723 
1724     Counters* counters = isolate()->counters();
1725     IncrementCounter(counters->sub_string_native(), 1);
1726 
1727     Goto(&end);
1728   }
1729 
1730   BIND(&empty);
1731   {
1732     var_result = EmptyStringConstant();
1733     Goto(&end);
1734   }
1735 
1736   // Substrings of length 1 are generated through CharCodeAt and FromCharCode.
1737   BIND(&single_char);
1738   {
1739     TNode<Int32T> char_code = StringCharCodeAt(string, Unsigned(from));
1740     var_result = StringFromSingleCharCode(char_code);
1741     Goto(&end);
1742   }
1743 
1744   BIND(&original_string_or_invalid_length);
1745   {
1746     CSA_DCHECK(this, IntPtrEqual(substr_length, string_length));
1747 
1748     // Equal length - check if {from, to} == {0, str.length}.
1749     GotoIf(UintPtrGreaterThan(from, IntPtrConstant(0)), &runtime);
1750 
1751     // Return the original string (substr_length == string_length).
1752 
1753     Counters* counters = isolate()->counters();
1754     IncrementCounter(counters->sub_string_native(), 1);
1755 
1756     var_result = string;
1757     Goto(&end);
1758   }
1759 
1760   // Fall back to a runtime call.
1761   BIND(&runtime);
1762   {
1763     var_result =
1764         CAST(CallRuntime(Runtime::kStringSubstring, NoContextConstant(), string,
1765                          SmiTag(from), SmiTag(to)));
1766     Goto(&end);
1767   }
1768 
1769   BIND(&end);
1770   return var_result.value();
1771 }
1772 
1773 }  // namespace internal
1774 }  // namespace v8
1775