• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/builtins/builtins-regexp.h"
6 #include "src/builtins/builtins-utils.h"
7 #include "src/builtins/builtins.h"
8 #include "src/code-factory.h"
9 #include "src/code-stub-assembler.h"
10 #include "src/conversions.h"
11 #include "src/counters.h"
12 #include "src/objects-inl.h"
13 #include "src/regexp/regexp-utils.h"
14 #include "src/string-case.h"
15 #include "src/unicode-inl.h"
16 #include "src/unicode.h"
17 
18 namespace v8 {
19 namespace internal {
20 
21 typedef CodeStubAssembler::ResultMode ResultMode;
22 typedef CodeStubAssembler::RelationalComparisonMode RelationalComparisonMode;
23 
24 class StringBuiltinsAssembler : public CodeStubAssembler {
25  public:
StringBuiltinsAssembler(compiler::CodeAssemblerState * state)26   explicit StringBuiltinsAssembler(compiler::CodeAssemblerState* state)
27       : CodeStubAssembler(state) {}
28 
29  protected:
DirectStringData(Node * string,Node * string_instance_type)30   Node* DirectStringData(Node* string, Node* string_instance_type) {
31     // Compute the effective offset of the first character.
32     Variable var_data(this, MachineType::PointerRepresentation());
33     Label if_sequential(this), if_external(this), if_join(this);
34     Branch(Word32Equal(Word32And(string_instance_type,
35                                  Int32Constant(kStringRepresentationMask)),
36                        Int32Constant(kSeqStringTag)),
37            &if_sequential, &if_external);
38 
39     Bind(&if_sequential);
40     {
41       var_data.Bind(IntPtrAdd(
42           IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag),
43           BitcastTaggedToWord(string)));
44       Goto(&if_join);
45     }
46 
47     Bind(&if_external);
48     {
49       // This is only valid for ExternalStrings where the resource data
50       // pointer is cached (i.e. no short external strings).
51       CSA_ASSERT(this, Word32NotEqual(
52                            Word32And(string_instance_type,
53                                      Int32Constant(kShortExternalStringMask)),
54                            Int32Constant(kShortExternalStringTag)));
55       var_data.Bind(LoadObjectField(string, ExternalString::kResourceDataOffset,
56                                     MachineType::Pointer()));
57       Goto(&if_join);
58     }
59 
60     Bind(&if_join);
61     return var_data.value();
62   }
63 
LoadOneByteChar(Node * string,Node * index)64   Node* LoadOneByteChar(Node* string, Node* index) {
65     return Load(MachineType::Uint8(), string, OneByteCharOffset(index));
66   }
67 
OneByteCharAddress(Node * string,Node * index)68   Node* OneByteCharAddress(Node* string, Node* index) {
69     Node* offset = OneByteCharOffset(index);
70     return IntPtrAdd(string, offset);
71   }
72 
OneByteCharOffset(Node * index)73   Node* OneByteCharOffset(Node* index) {
74     return CharOffset(String::ONE_BYTE_ENCODING, index);
75   }
76 
CharOffset(String::Encoding encoding,Node * index)77   Node* CharOffset(String::Encoding encoding, Node* index) {
78     const int header = SeqOneByteString::kHeaderSize - kHeapObjectTag;
79     Node* offset = index;
80     if (encoding == String::TWO_BYTE_ENCODING) {
81       offset = IntPtrAdd(offset, offset);
82     }
83     offset = IntPtrAdd(offset, IntPtrConstant(header));
84     return offset;
85   }
86 
DispatchOnStringInstanceType(Node * const instance_type,Label * if_onebyte_sequential,Label * if_onebyte_external,Label * if_otherwise)87   void DispatchOnStringInstanceType(Node* const instance_type,
88                                     Label* if_onebyte_sequential,
89                                     Label* if_onebyte_external,
90                                     Label* if_otherwise) {
91     const int kMask = kStringRepresentationMask | kStringEncodingMask;
92     Node* const encoding_and_representation =
93         Word32And(instance_type, Int32Constant(kMask));
94 
95     int32_t values[] = {
96         kOneByteStringTag | kSeqStringTag,
97         kOneByteStringTag | kExternalStringTag,
98     };
99     Label* labels[] = {
100         if_onebyte_sequential, if_onebyte_external,
101     };
102     STATIC_ASSERT(arraysize(values) == arraysize(labels));
103 
104     Switch(encoding_and_representation, if_otherwise, values, labels,
105            arraysize(values));
106   }
107 
108   void GenerateStringEqual(ResultMode mode);
109   void GenerateStringRelationalComparison(RelationalComparisonMode mode);
110 
111   Node* ToSmiBetweenZeroAnd(Node* context, Node* value, Node* limit);
112 
113   Node* LoadSurrogatePairAt(Node* string, Node* length, Node* index,
114                             UnicodeEncoding encoding);
115 
116   void StringIndexOf(Node* receiver, Node* instance_type, Node* search_string,
117                      Node* search_string_instance_type, Node* position,
118                      std::function<void(Node*)> f_return);
119 
120   Node* IsNullOrUndefined(Node* const value);
121   void RequireObjectCoercible(Node* const context, Node* const value,
122                               const char* method_name);
123 
SmiIsNegative(Node * const value)124   Node* SmiIsNegative(Node* const value) {
125     return SmiLessThan(value, SmiConstant(0));
126   }
127 
128   // Implements boilerplate logic for {match, split, replace, search} of the
129   // form:
130   //
131   //  if (!IS_NULL_OR_UNDEFINED(object)) {
132   //    var maybe_function = object[symbol];
133   //    if (!IS_UNDEFINED(maybe_function)) {
134   //      return %_Call(maybe_function, ...);
135   //    }
136   //  }
137   //
138   // Contains fast paths for Smi and RegExp objects.
139   typedef std::function<Node*()> NodeFunction0;
140   typedef std::function<Node*(Node* fn)> NodeFunction1;
141   void MaybeCallFunctionAtSymbol(Node* const context, Node* const object,
142                                  Node* const maybe_string,
143                                  Handle<Symbol> symbol,
144                                  const NodeFunction0& regexp_call,
145                                  const NodeFunction1& generic_call);
146 };
147 
GenerateStringEqual(ResultMode mode)148 void StringBuiltinsAssembler::GenerateStringEqual(ResultMode mode) {
149   // Here's pseudo-code for the algorithm below in case of kDontNegateResult
150   // mode; for kNegateResult mode we properly negate the result.
151   //
152   // if (lhs == rhs) return true;
153   // if (lhs->length() != rhs->length()) return false;
154   // if (lhs->IsInternalizedString() && rhs->IsInternalizedString()) {
155   //   return false;
156   // }
157   // if (lhs->IsSeqOneByteString() && rhs->IsSeqOneByteString()) {
158   //   for (i = 0; i != lhs->length(); ++i) {
159   //     if (lhs[i] != rhs[i]) return false;
160   //   }
161   //   return true;
162   // }
163   // if (lhs and/or rhs are indirect strings) {
164   //   unwrap them and restart from the beginning;
165   // }
166   // return %StringEqual(lhs, rhs);
167 
168   Variable var_left(this, MachineRepresentation::kTagged);
169   Variable var_right(this, MachineRepresentation::kTagged);
170   var_left.Bind(Parameter(0));
171   var_right.Bind(Parameter(1));
172   Node* context = Parameter(2);
173 
174   Variable* input_vars[2] = {&var_left, &var_right};
175   Label if_equal(this), if_notequal(this), restart(this, 2, input_vars);
176   Goto(&restart);
177   Bind(&restart);
178   Node* lhs = var_left.value();
179   Node* rhs = var_right.value();
180 
181   // Fast check to see if {lhs} and {rhs} refer to the same String object.
182   GotoIf(WordEqual(lhs, rhs), &if_equal);
183 
184   // Load the length of {lhs} and {rhs}.
185   Node* lhs_length = LoadStringLength(lhs);
186   Node* rhs_length = LoadStringLength(rhs);
187 
188   // Strings with different lengths cannot be equal.
189   GotoIf(WordNotEqual(lhs_length, rhs_length), &if_notequal);
190 
191   // Load instance types of {lhs} and {rhs}.
192   Node* lhs_instance_type = LoadInstanceType(lhs);
193   Node* rhs_instance_type = LoadInstanceType(rhs);
194 
195   // Combine the instance types into a single 16-bit value, so we can check
196   // both of them at once.
197   Node* both_instance_types = Word32Or(
198       lhs_instance_type, Word32Shl(rhs_instance_type, Int32Constant(8)));
199 
200   // Check if both {lhs} and {rhs} are internalized. Since we already know
201   // that they're not the same object, they're not equal in that case.
202   int const kBothInternalizedMask =
203       kIsNotInternalizedMask | (kIsNotInternalizedMask << 8);
204   int const kBothInternalizedTag = kInternalizedTag | (kInternalizedTag << 8);
205   GotoIf(Word32Equal(Word32And(both_instance_types,
206                                Int32Constant(kBothInternalizedMask)),
207                      Int32Constant(kBothInternalizedTag)),
208          &if_notequal);
209 
210   // Check that both {lhs} and {rhs} are flat one-byte strings, and that
211   // in case of ExternalStrings the data pointer is cached..
212   STATIC_ASSERT(kShortExternalStringTag != 0);
213   int const kBothDirectOneByteStringMask =
214       kStringEncodingMask | kIsIndirectStringMask | kShortExternalStringMask |
215       ((kStringEncodingMask | kIsIndirectStringMask | kShortExternalStringMask)
216        << 8);
217   int const kBothDirectOneByteStringTag =
218       kOneByteStringTag | (kOneByteStringTag << 8);
219   Label if_bothdirectonebytestrings(this), if_notbothdirectonebytestrings(this);
220   Branch(Word32Equal(Word32And(both_instance_types,
221                                Int32Constant(kBothDirectOneByteStringMask)),
222                      Int32Constant(kBothDirectOneByteStringTag)),
223          &if_bothdirectonebytestrings, &if_notbothdirectonebytestrings);
224 
225   Bind(&if_bothdirectonebytestrings);
226   {
227     // Compute the effective offset of the first character.
228     Node* lhs_data = DirectStringData(lhs, lhs_instance_type);
229     Node* rhs_data = DirectStringData(rhs, rhs_instance_type);
230 
231     // Compute the first offset after the string from the length.
232     Node* length = SmiUntag(lhs_length);
233 
234     // Loop over the {lhs} and {rhs} strings to see if they are equal.
235     Variable var_offset(this, MachineType::PointerRepresentation());
236     Label loop(this, &var_offset);
237     var_offset.Bind(IntPtrConstant(0));
238     Goto(&loop);
239     Bind(&loop);
240     {
241       // If {offset} equals {end}, no difference was found, so the
242       // strings are equal.
243       Node* offset = var_offset.value();
244       GotoIf(WordEqual(offset, length), &if_equal);
245 
246       // Load the next characters from {lhs} and {rhs}.
247       Node* lhs_value = Load(MachineType::Uint8(), lhs_data, offset);
248       Node* rhs_value = Load(MachineType::Uint8(), rhs_data, offset);
249 
250       // Check if the characters match.
251       GotoIf(Word32NotEqual(lhs_value, rhs_value), &if_notequal);
252 
253       // Advance to next character.
254       var_offset.Bind(IntPtrAdd(offset, IntPtrConstant(1)));
255       Goto(&loop);
256     }
257   }
258 
259   Bind(&if_notbothdirectonebytestrings);
260   {
261     // Try to unwrap indirect strings, restart the above attempt on success.
262     MaybeDerefIndirectStrings(&var_left, lhs_instance_type, &var_right,
263                               rhs_instance_type, &restart);
264     // TODO(bmeurer): Add support for two byte string equality checks.
265 
266     Runtime::FunctionId function_id = (mode == ResultMode::kDontNegateResult)
267                                           ? Runtime::kStringEqual
268                                           : Runtime::kStringNotEqual;
269     TailCallRuntime(function_id, context, lhs, rhs);
270   }
271 
272   Bind(&if_equal);
273   Return(BooleanConstant(mode == ResultMode::kDontNegateResult));
274 
275   Bind(&if_notequal);
276   Return(BooleanConstant(mode == ResultMode::kNegateResult));
277 }
278 
GenerateStringRelationalComparison(RelationalComparisonMode mode)279 void StringBuiltinsAssembler::GenerateStringRelationalComparison(
280     RelationalComparisonMode mode) {
281   Variable var_left(this, MachineRepresentation::kTagged);
282   Variable var_right(this, MachineRepresentation::kTagged);
283   var_left.Bind(Parameter(0));
284   var_right.Bind(Parameter(1));
285   Node* context = Parameter(2);
286 
287   Variable* input_vars[2] = {&var_left, &var_right};
288   Label if_less(this), if_equal(this), if_greater(this);
289   Label restart(this, 2, input_vars);
290   Goto(&restart);
291   Bind(&restart);
292 
293   Node* lhs = var_left.value();
294   Node* rhs = var_right.value();
295   // Fast check to see if {lhs} and {rhs} refer to the same String object.
296   GotoIf(WordEqual(lhs, rhs), &if_equal);
297 
298   // Load instance types of {lhs} and {rhs}.
299   Node* lhs_instance_type = LoadInstanceType(lhs);
300   Node* rhs_instance_type = LoadInstanceType(rhs);
301 
302   // Combine the instance types into a single 16-bit value, so we can check
303   // both of them at once.
304   Node* both_instance_types = Word32Or(
305       lhs_instance_type, Word32Shl(rhs_instance_type, Int32Constant(8)));
306 
307   // Check that both {lhs} and {rhs} are flat one-byte strings.
308   int const kBothSeqOneByteStringMask =
309       kStringEncodingMask | kStringRepresentationMask |
310       ((kStringEncodingMask | kStringRepresentationMask) << 8);
311   int const kBothSeqOneByteStringTag =
312       kOneByteStringTag | kSeqStringTag |
313       ((kOneByteStringTag | kSeqStringTag) << 8);
314   Label if_bothonebyteseqstrings(this), if_notbothonebyteseqstrings(this);
315   Branch(Word32Equal(Word32And(both_instance_types,
316                                Int32Constant(kBothSeqOneByteStringMask)),
317                      Int32Constant(kBothSeqOneByteStringTag)),
318          &if_bothonebyteseqstrings, &if_notbothonebyteseqstrings);
319 
320   Bind(&if_bothonebyteseqstrings);
321   {
322     // Load the length of {lhs} and {rhs}.
323     Node* lhs_length = LoadStringLength(lhs);
324     Node* rhs_length = LoadStringLength(rhs);
325 
326     // Determine the minimum length.
327     Node* length = SmiMin(lhs_length, rhs_length);
328 
329     // Compute the effective offset of the first character.
330     Node* begin =
331         IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag);
332 
333     // Compute the first offset after the string from the length.
334     Node* end = IntPtrAdd(begin, SmiUntag(length));
335 
336     // Loop over the {lhs} and {rhs} strings to see if they are equal.
337     Variable var_offset(this, MachineType::PointerRepresentation());
338     Label loop(this, &var_offset);
339     var_offset.Bind(begin);
340     Goto(&loop);
341     Bind(&loop);
342     {
343       // Check if {offset} equals {end}.
344       Node* offset = var_offset.value();
345       Label if_done(this), if_notdone(this);
346       Branch(WordEqual(offset, end), &if_done, &if_notdone);
347 
348       Bind(&if_notdone);
349       {
350         // Load the next characters from {lhs} and {rhs}.
351         Node* lhs_value = Load(MachineType::Uint8(), lhs, offset);
352         Node* rhs_value = Load(MachineType::Uint8(), rhs, offset);
353 
354         // Check if the characters match.
355         Label if_valueissame(this), if_valueisnotsame(this);
356         Branch(Word32Equal(lhs_value, rhs_value), &if_valueissame,
357                &if_valueisnotsame);
358 
359         Bind(&if_valueissame);
360         {
361           // Advance to next character.
362           var_offset.Bind(IntPtrAdd(offset, IntPtrConstant(1)));
363         }
364         Goto(&loop);
365 
366         Bind(&if_valueisnotsame);
367         Branch(Uint32LessThan(lhs_value, rhs_value), &if_less, &if_greater);
368       }
369 
370       Bind(&if_done);
371       {
372         // All characters up to the min length are equal, decide based on
373         // string length.
374         GotoIf(SmiEqual(lhs_length, rhs_length), &if_equal);
375         BranchIfSmiLessThan(lhs_length, rhs_length, &if_less, &if_greater);
376       }
377     }
378     }
379 
380     Bind(&if_notbothonebyteseqstrings);
381     {
382       // Try to unwrap indirect strings, restart the above attempt on success.
383       MaybeDerefIndirectStrings(&var_left, lhs_instance_type, &var_right,
384                                 rhs_instance_type, &restart);
385       // TODO(bmeurer): Add support for two byte string relational comparisons.
386       switch (mode) {
387         case RelationalComparisonMode::kLessThan:
388           TailCallRuntime(Runtime::kStringLessThan, context, lhs, rhs);
389           break;
390         case RelationalComparisonMode::kLessThanOrEqual:
391           TailCallRuntime(Runtime::kStringLessThanOrEqual, context, lhs, rhs);
392           break;
393         case RelationalComparisonMode::kGreaterThan:
394           TailCallRuntime(Runtime::kStringGreaterThan, context, lhs, rhs);
395           break;
396         case RelationalComparisonMode::kGreaterThanOrEqual:
397           TailCallRuntime(Runtime::kStringGreaterThanOrEqual, context, lhs,
398                           rhs);
399           break;
400       }
401     }
402 
403     Bind(&if_less);
404     switch (mode) {
405       case RelationalComparisonMode::kLessThan:
406       case RelationalComparisonMode::kLessThanOrEqual:
407         Return(BooleanConstant(true));
408         break;
409 
410       case RelationalComparisonMode::kGreaterThan:
411       case RelationalComparisonMode::kGreaterThanOrEqual:
412         Return(BooleanConstant(false));
413         break;
414   }
415 
416   Bind(&if_equal);
417   switch (mode) {
418     case RelationalComparisonMode::kLessThan:
419     case RelationalComparisonMode::kGreaterThan:
420       Return(BooleanConstant(false));
421       break;
422 
423     case RelationalComparisonMode::kLessThanOrEqual:
424     case RelationalComparisonMode::kGreaterThanOrEqual:
425       Return(BooleanConstant(true));
426       break;
427   }
428 
429   Bind(&if_greater);
430   switch (mode) {
431     case RelationalComparisonMode::kLessThan:
432     case RelationalComparisonMode::kLessThanOrEqual:
433       Return(BooleanConstant(false));
434       break;
435 
436     case RelationalComparisonMode::kGreaterThan:
437     case RelationalComparisonMode::kGreaterThanOrEqual:
438       Return(BooleanConstant(true));
439       break;
440   }
441 }
442 
TF_BUILTIN(StringEqual,StringBuiltinsAssembler)443 TF_BUILTIN(StringEqual, StringBuiltinsAssembler) {
444   GenerateStringEqual(ResultMode::kDontNegateResult);
445 }
446 
TF_BUILTIN(StringNotEqual,StringBuiltinsAssembler)447 TF_BUILTIN(StringNotEqual, StringBuiltinsAssembler) {
448   GenerateStringEqual(ResultMode::kNegateResult);
449 }
450 
TF_BUILTIN(StringLessThan,StringBuiltinsAssembler)451 TF_BUILTIN(StringLessThan, StringBuiltinsAssembler) {
452   GenerateStringRelationalComparison(RelationalComparisonMode::kLessThan);
453 }
454 
TF_BUILTIN(StringLessThanOrEqual,StringBuiltinsAssembler)455 TF_BUILTIN(StringLessThanOrEqual, StringBuiltinsAssembler) {
456   GenerateStringRelationalComparison(
457       RelationalComparisonMode::kLessThanOrEqual);
458 }
459 
TF_BUILTIN(StringGreaterThan,StringBuiltinsAssembler)460 TF_BUILTIN(StringGreaterThan, StringBuiltinsAssembler) {
461   GenerateStringRelationalComparison(RelationalComparisonMode::kGreaterThan);
462 }
463 
TF_BUILTIN(StringGreaterThanOrEqual,StringBuiltinsAssembler)464 TF_BUILTIN(StringGreaterThanOrEqual, StringBuiltinsAssembler) {
465   GenerateStringRelationalComparison(
466       RelationalComparisonMode::kGreaterThanOrEqual);
467 }
468 
TF_BUILTIN(StringCharAt,CodeStubAssembler)469 TF_BUILTIN(StringCharAt, CodeStubAssembler) {
470   Node* receiver = Parameter(0);
471   Node* position = Parameter(1);
472 
473   // Load the character code at the {position} from the {receiver}.
474   Node* code = StringCharCodeAt(receiver, position, INTPTR_PARAMETERS);
475 
476   // And return the single character string with only that {code}
477   Node* result = StringFromCharCode(code);
478   Return(result);
479 }
480 
TF_BUILTIN(StringCharCodeAt,CodeStubAssembler)481 TF_BUILTIN(StringCharCodeAt, CodeStubAssembler) {
482   Node* receiver = Parameter(0);
483   Node* position = Parameter(1);
484 
485   // Load the character code at the {position} from the {receiver}.
486   Node* code = StringCharCodeAt(receiver, position, INTPTR_PARAMETERS);
487 
488   // And return it as TaggedSigned value.
489   // TODO(turbofan): Allow builtins to return values untagged.
490   Node* result = SmiFromWord32(code);
491   Return(result);
492 }
493 
494 // -----------------------------------------------------------------------------
495 // ES6 section 21.1 String Objects
496 
497 // ES6 section 21.1.2.1 String.fromCharCode ( ...codeUnits )
TF_BUILTIN(StringFromCharCode,CodeStubAssembler)498 TF_BUILTIN(StringFromCharCode, CodeStubAssembler) {
499   Node* argc = Parameter(BuiltinDescriptor::kArgumentsCount);
500   Node* context = Parameter(BuiltinDescriptor::kContext);
501 
502   CodeStubArguments arguments(this, ChangeInt32ToIntPtr(argc));
503   // From now on use word-size argc value.
504   argc = arguments.GetLength();
505 
506   // Check if we have exactly one argument (plus the implicit receiver), i.e.
507   // if the parent frame is not an arguments adaptor frame.
508   Label if_oneargument(this), if_notoneargument(this);
509   Branch(WordEqual(argc, IntPtrConstant(1)), &if_oneargument,
510          &if_notoneargument);
511 
512   Bind(&if_oneargument);
513   {
514     // Single argument case, perform fast single character string cache lookup
515     // for one-byte code units, or fall back to creating a single character
516     // string on the fly otherwise.
517     Node* code = arguments.AtIndex(0);
518     Node* code32 = TruncateTaggedToWord32(context, code);
519     Node* code16 = Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit));
520     Node* result = StringFromCharCode(code16);
521     arguments.PopAndReturn(result);
522   }
523 
524   Node* code16 = nullptr;
525   Bind(&if_notoneargument);
526   {
527     Label two_byte(this);
528     // Assume that the resulting string contains only one-byte characters.
529     Node* one_byte_result = AllocateSeqOneByteString(context, argc);
530 
531     Variable max_index(this, MachineType::PointerRepresentation());
532     max_index.Bind(IntPtrConstant(0));
533 
534     // Iterate over the incoming arguments, converting them to 8-bit character
535     // codes. Stop if any of the conversions generates a code that doesn't fit
536     // in 8 bits.
537     CodeStubAssembler::VariableList vars({&max_index}, zone());
538     arguments.ForEach(vars, [this, context, &two_byte, &max_index, &code16,
539                              one_byte_result](Node* arg) {
540       Node* code32 = TruncateTaggedToWord32(context, arg);
541       code16 = Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit));
542 
543       GotoIf(
544           Int32GreaterThan(code16, Int32Constant(String::kMaxOneByteCharCode)),
545           &two_byte);
546 
547       // The {code16} fits into the SeqOneByteString {one_byte_result}.
548       Node* offset = ElementOffsetFromIndex(
549           max_index.value(), UINT8_ELEMENTS,
550           CodeStubAssembler::INTPTR_PARAMETERS,
551           SeqOneByteString::kHeaderSize - kHeapObjectTag);
552       StoreNoWriteBarrier(MachineRepresentation::kWord8, one_byte_result,
553                           offset, code16);
554       max_index.Bind(IntPtrAdd(max_index.value(), IntPtrConstant(1)));
555     });
556     arguments.PopAndReturn(one_byte_result);
557 
558     Bind(&two_byte);
559 
560     // At least one of the characters in the string requires a 16-bit
561     // representation.  Allocate a SeqTwoByteString to hold the resulting
562     // string.
563     Node* two_byte_result = AllocateSeqTwoByteString(context, argc);
564 
565     // Copy the characters that have already been put in the 8-bit string into
566     // their corresponding positions in the new 16-bit string.
567     Node* zero = IntPtrConstant(0);
568     CopyStringCharacters(one_byte_result, two_byte_result, zero, zero,
569                          max_index.value(), String::ONE_BYTE_ENCODING,
570                          String::TWO_BYTE_ENCODING,
571                          CodeStubAssembler::INTPTR_PARAMETERS);
572 
573     // Write the character that caused the 8-bit to 16-bit fault.
574     Node* max_index_offset =
575         ElementOffsetFromIndex(max_index.value(), UINT16_ELEMENTS,
576                                CodeStubAssembler::INTPTR_PARAMETERS,
577                                SeqTwoByteString::kHeaderSize - kHeapObjectTag);
578     StoreNoWriteBarrier(MachineRepresentation::kWord16, two_byte_result,
579                         max_index_offset, code16);
580     max_index.Bind(IntPtrAdd(max_index.value(), IntPtrConstant(1)));
581 
582     // Resume copying the passed-in arguments from the same place where the
583     // 8-bit copy stopped, but this time copying over all of the characters
584     // using a 16-bit representation.
585     arguments.ForEach(
586         vars,
587         [this, context, two_byte_result, &max_index](Node* arg) {
588           Node* code32 = TruncateTaggedToWord32(context, arg);
589           Node* code16 =
590               Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit));
591 
592           Node* offset = ElementOffsetFromIndex(
593               max_index.value(), UINT16_ELEMENTS,
594               CodeStubAssembler::INTPTR_PARAMETERS,
595               SeqTwoByteString::kHeaderSize - kHeapObjectTag);
596           StoreNoWriteBarrier(MachineRepresentation::kWord16, two_byte_result,
597                               offset, code16);
598           max_index.Bind(IntPtrAdd(max_index.value(), IntPtrConstant(1)));
599         },
600         max_index.value());
601 
602     arguments.PopAndReturn(two_byte_result);
603   }
604 }
605 
606 namespace {  // for String.fromCodePoint
607 
IsValidCodePoint(Isolate * isolate,Handle<Object> value)608 bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
609   if (!value->IsNumber() && !Object::ToNumber(value).ToHandle(&value)) {
610     return false;
611   }
612 
613   if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
614       value->Number()) {
615     return false;
616   }
617 
618   if (value->Number() < 0 || value->Number() > 0x10FFFF) {
619     return false;
620   }
621 
622   return true;
623 }
624 
NextCodePoint(Isolate * isolate,BuiltinArguments args,int index)625 uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
626   Handle<Object> value = args.at(1 + index);
627   ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, value, Object::ToNumber(value), -1);
628   if (!IsValidCodePoint(isolate, value)) {
629     isolate->Throw(*isolate->factory()->NewRangeError(
630         MessageTemplate::kInvalidCodePoint, value));
631     return -1;
632   }
633   return DoubleToUint32(value->Number());
634 }
635 
636 }  // namespace
637 
638 // ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
BUILTIN(StringFromCodePoint)639 BUILTIN(StringFromCodePoint) {
640   HandleScope scope(isolate);
641   int const length = args.length() - 1;
642   if (length == 0) return isolate->heap()->empty_string();
643   DCHECK_LT(0, length);
644 
645   // Optimistically assume that the resulting String contains only one byte
646   // characters.
647   List<uint8_t> one_byte_buffer(length);
648   uc32 code = 0;
649   int index;
650   for (index = 0; index < length; index++) {
651     code = NextCodePoint(isolate, args, index);
652     if (code < 0) {
653       return isolate->heap()->exception();
654     }
655     if (code > String::kMaxOneByteCharCode) {
656       break;
657     }
658     one_byte_buffer.Add(code);
659   }
660 
661   if (index == length) {
662     RETURN_RESULT_OR_FAILURE(isolate, isolate->factory()->NewStringFromOneByte(
663                                           one_byte_buffer.ToConstVector()));
664   }
665 
666   List<uc16> two_byte_buffer(length - index);
667 
668   while (true) {
669     if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
670       two_byte_buffer.Add(code);
671     } else {
672       two_byte_buffer.Add(unibrow::Utf16::LeadSurrogate(code));
673       two_byte_buffer.Add(unibrow::Utf16::TrailSurrogate(code));
674     }
675 
676     if (++index == length) {
677       break;
678     }
679     code = NextCodePoint(isolate, args, index);
680     if (code < 0) {
681       return isolate->heap()->exception();
682     }
683   }
684 
685   Handle<SeqTwoByteString> result;
686   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
687       isolate, result,
688       isolate->factory()->NewRawTwoByteString(one_byte_buffer.length() +
689                                               two_byte_buffer.length()));
690 
691   CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(),
692             one_byte_buffer.length());
693   CopyChars(result->GetChars() + one_byte_buffer.length(),
694             two_byte_buffer.ToConstVector().start(), two_byte_buffer.length());
695 
696   return *result;
697 }
698 
699 // ES6 section 21.1.3.1 String.prototype.charAt ( pos )
TF_BUILTIN(StringPrototypeCharAt,CodeStubAssembler)700 TF_BUILTIN(StringPrototypeCharAt, CodeStubAssembler) {
701   Node* receiver = Parameter(0);
702   Node* position = Parameter(1);
703   Node* context = Parameter(4);
704 
705   // Check that {receiver} is coercible to Object and convert it to a String.
706   receiver = ToThisString(context, receiver, "String.prototype.charAt");
707 
708   // Convert the {position} to a Smi and check that it's in bounds of the
709   // {receiver}.
710   {
711     Label return_emptystring(this, Label::kDeferred);
712     position =
713         ToInteger(context, position, CodeStubAssembler::kTruncateMinusZero);
714     GotoIfNot(TaggedIsSmi(position), &return_emptystring);
715 
716     // Determine the actual length of the {receiver} String.
717     Node* receiver_length = LoadObjectField(receiver, String::kLengthOffset);
718 
719     // Return "" if the Smi {position} is outside the bounds of the {receiver}.
720     Label if_positioninbounds(this);
721     Branch(SmiAboveOrEqual(position, receiver_length), &return_emptystring,
722            &if_positioninbounds);
723 
724     Bind(&return_emptystring);
725     Return(EmptyStringConstant());
726 
727     Bind(&if_positioninbounds);
728   }
729 
730   // Load the character code at the {position} from the {receiver}.
731   Node* code = StringCharCodeAt(receiver, position);
732 
733   // And return the single character string with only that {code}.
734   Node* result = StringFromCharCode(code);
735   Return(result);
736 }
737 
738 // ES6 section 21.1.3.2 String.prototype.charCodeAt ( pos )
TF_BUILTIN(StringPrototypeCharCodeAt,CodeStubAssembler)739 TF_BUILTIN(StringPrototypeCharCodeAt, CodeStubAssembler) {
740   Node* receiver = Parameter(0);
741   Node* position = Parameter(1);
742   Node* context = Parameter(4);
743 
744   // Check that {receiver} is coercible to Object and convert it to a String.
745   receiver = ToThisString(context, receiver, "String.prototype.charCodeAt");
746 
747   // Convert the {position} to a Smi and check that it's in bounds of the
748   // {receiver}.
749   {
750     Label return_nan(this, Label::kDeferred);
751     position =
752         ToInteger(context, position, CodeStubAssembler::kTruncateMinusZero);
753     GotoIfNot(TaggedIsSmi(position), &return_nan);
754 
755     // Determine the actual length of the {receiver} String.
756     Node* receiver_length = LoadObjectField(receiver, String::kLengthOffset);
757 
758     // Return NaN if the Smi {position} is outside the bounds of the {receiver}.
759     Label if_positioninbounds(this);
760     Branch(SmiAboveOrEqual(position, receiver_length), &return_nan,
761            &if_positioninbounds);
762 
763     Bind(&return_nan);
764     Return(NaNConstant());
765 
766     Bind(&if_positioninbounds);
767   }
768 
769   // Load the character at the {position} from the {receiver}.
770   Node* value = StringCharCodeAt(receiver, position);
771   Node* result = SmiFromWord32(value);
772   Return(result);
773 }
774 
775 // ES6 section 21.1.3.6
776 // String.prototype.endsWith ( searchString [ , endPosition ] )
BUILTIN(StringPrototypeEndsWith)777 BUILTIN(StringPrototypeEndsWith) {
778   HandleScope handle_scope(isolate);
779   TO_THIS_STRING(str, "String.prototype.endsWith");
780 
781   // Check if the search string is a regExp and fail if it is.
782   Handle<Object> search = args.atOrUndefined(isolate, 1);
783   Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
784   if (is_reg_exp.IsNothing()) {
785     DCHECK(isolate->has_pending_exception());
786     return isolate->heap()->exception();
787   }
788   if (is_reg_exp.FromJust()) {
789     THROW_NEW_ERROR_RETURN_FAILURE(
790         isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
791                               isolate->factory()->NewStringFromStaticChars(
792                                   "String.prototype.endsWith")));
793   }
794   Handle<String> search_string;
795   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
796                                      Object::ToString(isolate, search));
797 
798   Handle<Object> position = args.atOrUndefined(isolate, 2);
799   int end;
800 
801   if (position->IsUndefined(isolate)) {
802     end = str->length();
803   } else {
804     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
805                                        Object::ToInteger(isolate, position));
806     end = str->ToValidIndex(*position);
807   }
808 
809   int start = end - search_string->length();
810   if (start < 0) return isolate->heap()->false_value();
811 
812   str = String::Flatten(str);
813   search_string = String::Flatten(search_string);
814 
815   DisallowHeapAllocation no_gc;  // ensure vectors stay valid
816   String::FlatContent str_content = str->GetFlatContent();
817   String::FlatContent search_content = search_string->GetFlatContent();
818 
819   if (str_content.IsOneByte() && search_content.IsOneByte()) {
820     Vector<const uint8_t> str_vector = str_content.ToOneByteVector();
821     Vector<const uint8_t> search_vector = search_content.ToOneByteVector();
822 
823     return isolate->heap()->ToBoolean(memcmp(str_vector.start() + start,
824                                              search_vector.start(),
825                                              search_string->length()) == 0);
826   }
827 
828   FlatStringReader str_reader(isolate, str);
829   FlatStringReader search_reader(isolate, search_string);
830 
831   for (int i = 0; i < search_string->length(); i++) {
832     if (str_reader.Get(start + i) != search_reader.Get(i)) {
833       return isolate->heap()->false_value();
834     }
835   }
836   return isolate->heap()->true_value();
837 }
838 
839 // ES6 section 21.1.3.7
840 // String.prototype.includes ( searchString [ , position ] )
BUILTIN(StringPrototypeIncludes)841 BUILTIN(StringPrototypeIncludes) {
842   HandleScope handle_scope(isolate);
843   TO_THIS_STRING(str, "String.prototype.includes");
844 
845   // Check if the search string is a regExp and fail if it is.
846   Handle<Object> search = args.atOrUndefined(isolate, 1);
847   Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
848   if (is_reg_exp.IsNothing()) {
849     DCHECK(isolate->has_pending_exception());
850     return isolate->heap()->exception();
851   }
852   if (is_reg_exp.FromJust()) {
853     THROW_NEW_ERROR_RETURN_FAILURE(
854         isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
855                               isolate->factory()->NewStringFromStaticChars(
856                                   "String.prototype.includes")));
857   }
858   Handle<String> search_string;
859   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
860                                      Object::ToString(isolate, search));
861   Handle<Object> position;
862   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
863       isolate, position,
864       Object::ToInteger(isolate, args.atOrUndefined(isolate, 2)));
865 
866   uint32_t index = str->ToValidIndex(*position);
867   int index_in_str = String::IndexOf(isolate, str, search_string, index);
868   return *isolate->factory()->ToBoolean(index_in_str != -1);
869 }
870 
StringIndexOf(Node * receiver,Node * instance_type,Node * search_string,Node * search_string_instance_type,Node * position,std::function<void (Node *)> f_return)871 void StringBuiltinsAssembler::StringIndexOf(
872     Node* receiver, Node* instance_type, Node* search_string,
873     Node* search_string_instance_type, Node* position,
874     std::function<void(Node*)> f_return) {
875   CSA_ASSERT(this, IsString(receiver));
876   CSA_ASSERT(this, IsString(search_string));
877   CSA_ASSERT(this, TaggedIsSmi(position));
878 
879   Label zero_length_needle(this),
880       call_runtime_unchecked(this, Label::kDeferred), return_minus_1(this),
881       check_search_string(this), continue_fast_path(this);
882 
883   Node* const int_zero = IntPtrConstant(0);
884   Variable var_needle_byte(this, MachineType::PointerRepresentation(),
885                            int_zero);
886   Variable var_string_addr(this, MachineType::PointerRepresentation(),
887                            int_zero);
888 
889   Node* needle_length = SmiUntag(LoadStringLength(search_string));
890   // Use faster/complex runtime fallback for long search strings.
891   GotoIf(IntPtrLessThan(IntPtrConstant(1), needle_length),
892          &call_runtime_unchecked);
893   Node* string_length = SmiUntag(LoadStringLength(receiver));
894   Node* start_position = IntPtrMax(SmiUntag(position), int_zero);
895 
896   GotoIf(IntPtrEqual(int_zero, needle_length), &zero_length_needle);
897   // Check that the needle fits in the start position.
898   GotoIfNot(IntPtrLessThanOrEqual(needle_length,
899                                   IntPtrSub(string_length, start_position)),
900             &return_minus_1);
901 
902   // Load the string address.
903   {
904     Label if_onebyte_sequential(this);
905     Label if_onebyte_external(this, Label::kDeferred);
906 
907     // Only support one-byte strings on the fast path.
908     DispatchOnStringInstanceType(instance_type, &if_onebyte_sequential,
909                                  &if_onebyte_external, &call_runtime_unchecked);
910 
911     Bind(&if_onebyte_sequential);
912     {
913       var_string_addr.Bind(
914           OneByteCharAddress(BitcastTaggedToWord(receiver), start_position));
915       Goto(&check_search_string);
916     }
917 
918     Bind(&if_onebyte_external);
919     {
920       Node* const unpacked = TryDerefExternalString(receiver, instance_type,
921                                                     &call_runtime_unchecked);
922       var_string_addr.Bind(OneByteCharAddress(unpacked, start_position));
923       Goto(&check_search_string);
924     }
925   }
926 
927   // Load the needle character.
928   Bind(&check_search_string);
929   {
930     Label if_onebyte_sequential(this);
931     Label if_onebyte_external(this, Label::kDeferred);
932 
933     DispatchOnStringInstanceType(search_string_instance_type,
934                                  &if_onebyte_sequential, &if_onebyte_external,
935                                  &call_runtime_unchecked);
936 
937     Bind(&if_onebyte_sequential);
938     {
939       var_needle_byte.Bind(
940           ChangeInt32ToIntPtr(LoadOneByteChar(search_string, int_zero)));
941       Goto(&continue_fast_path);
942     }
943 
944     Bind(&if_onebyte_external);
945     {
946       Node* const unpacked = TryDerefExternalString(
947           search_string, search_string_instance_type, &call_runtime_unchecked);
948       var_needle_byte.Bind(
949           ChangeInt32ToIntPtr(LoadOneByteChar(unpacked, int_zero)));
950       Goto(&continue_fast_path);
951     }
952   }
953 
954   Bind(&continue_fast_path);
955   {
956     Node* needle_byte = var_needle_byte.value();
957     Node* string_addr = var_string_addr.value();
958     Node* search_length = IntPtrSub(string_length, start_position);
959     // Call out to the highly optimized memchr to perform the actual byte
960     // search.
961     Node* memchr =
962         ExternalConstant(ExternalReference::libc_memchr_function(isolate()));
963     Node* result_address =
964         CallCFunction3(MachineType::Pointer(), MachineType::Pointer(),
965                        MachineType::IntPtr(), MachineType::UintPtr(), memchr,
966                        string_addr, needle_byte, search_length);
967     GotoIf(WordEqual(result_address, int_zero), &return_minus_1);
968     Node* result_index =
969         IntPtrAdd(IntPtrSub(result_address, string_addr), start_position);
970     f_return(SmiTag(result_index));
971   }
972 
973   Bind(&return_minus_1);
974   f_return(SmiConstant(-1));
975 
976   Bind(&zero_length_needle);
977   {
978     Comment("0-length search_string");
979     f_return(SmiTag(IntPtrMin(string_length, start_position)));
980   }
981 
982   Bind(&call_runtime_unchecked);
983   {
984     // Simplified version of the runtime call where the types of the arguments
985     // are already known due to type checks in this stub.
986     Comment("Call Runtime Unchecked");
987     Node* result = CallRuntime(Runtime::kStringIndexOfUnchecked, SmiConstant(0),
988                                receiver, search_string, position);
989     f_return(result);
990   }
991 }
992 
993 // ES6 String.prototype.indexOf(searchString [, position])
994 // #sec-string.prototype.indexof
995 // Unchecked helper for builtins lowering.
TF_BUILTIN(StringIndexOf,StringBuiltinsAssembler)996 TF_BUILTIN(StringIndexOf, StringBuiltinsAssembler) {
997   Node* receiver = Parameter(0);
998   Node* search_string = Parameter(1);
999   Node* position = Parameter(2);
1000 
1001   Node* instance_type = LoadInstanceType(receiver);
1002   Node* search_string_instance_type = LoadInstanceType(search_string);
1003 
1004   StringIndexOf(receiver, instance_type, search_string,
1005                 search_string_instance_type, position,
1006                 [this](Node* result) { this->Return(result); });
1007 }
1008 
1009 // ES6 String.prototype.indexOf(searchString [, position])
1010 // #sec-string.prototype.indexof
TF_BUILTIN(StringPrototypeIndexOf,StringBuiltinsAssembler)1011 TF_BUILTIN(StringPrototypeIndexOf, StringBuiltinsAssembler) {
1012   Variable search_string(this, MachineRepresentation::kTagged),
1013       position(this, MachineRepresentation::kTagged);
1014   Label call_runtime(this), call_runtime_unchecked(this), argc_0(this),
1015       no_argc_0(this), argc_1(this), no_argc_1(this), argc_2(this),
1016       fast_path(this), return_minus_1(this);
1017 
1018   Node* argc = Parameter(BuiltinDescriptor::kArgumentsCount);
1019   Node* context = Parameter(BuiltinDescriptor::kContext);
1020 
1021   CodeStubArguments arguments(this, ChangeInt32ToIntPtr(argc));
1022   Node* receiver = arguments.GetReceiver();
1023   // From now on use word-size argc value.
1024   argc = arguments.GetLength();
1025 
1026   GotoIf(IntPtrEqual(argc, IntPtrConstant(0)), &argc_0);
1027   GotoIf(IntPtrEqual(argc, IntPtrConstant(1)), &argc_1);
1028   Goto(&argc_2);
1029   Bind(&argc_0);
1030   {
1031     Comment("0 Argument case");
1032     Node* undefined = UndefinedConstant();
1033     search_string.Bind(undefined);
1034     position.Bind(undefined);
1035     Goto(&call_runtime);
1036   }
1037   Bind(&argc_1);
1038   {
1039     Comment("1 Argument case");
1040     search_string.Bind(arguments.AtIndex(0));
1041     position.Bind(SmiConstant(0));
1042     Goto(&fast_path);
1043   }
1044   Bind(&argc_2);
1045   {
1046     Comment("2 Argument case");
1047     search_string.Bind(arguments.AtIndex(0));
1048     position.Bind(arguments.AtIndex(1));
1049     GotoIfNot(TaggedIsSmi(position.value()), &call_runtime);
1050     Goto(&fast_path);
1051   }
1052 
1053   Bind(&fast_path);
1054   {
1055     Comment("Fast Path");
1056     GotoIf(TaggedIsSmi(receiver), &call_runtime);
1057     Node* needle = search_string.value();
1058     GotoIf(TaggedIsSmi(needle), &call_runtime);
1059 
1060     Node* instance_type = LoadInstanceType(receiver);
1061     GotoIfNot(IsStringInstanceType(instance_type), &call_runtime);
1062 
1063     Node* needle_instance_type = LoadInstanceType(needle);
1064     GotoIfNot(IsStringInstanceType(needle_instance_type), &call_runtime);
1065 
1066     StringIndexOf(
1067         receiver, instance_type, needle, needle_instance_type, position.value(),
1068         [&arguments](Node* result) { arguments.PopAndReturn(result); });
1069   }
1070 
1071   Bind(&call_runtime);
1072   {
1073     Comment("Call Runtime");
1074     Node* result = CallRuntime(Runtime::kStringIndexOf, context, receiver,
1075                                search_string.value(), position.value());
1076     arguments.PopAndReturn(result);
1077   }
1078 }
1079 
1080 // ES6 section 21.1.3.9
1081 // String.prototype.lastIndexOf ( searchString [ , position ] )
BUILTIN(StringPrototypeLastIndexOf)1082 BUILTIN(StringPrototypeLastIndexOf) {
1083   HandleScope handle_scope(isolate);
1084   return String::LastIndexOf(isolate, args.receiver(),
1085                              args.atOrUndefined(isolate, 1),
1086                              args.atOrUndefined(isolate, 2));
1087 }
1088 
1089 // ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
1090 //
1091 // This function is implementation specific.  For now, we do not
1092 // do anything locale specific.
1093 // If internationalization is enabled, then i18n.js will override this function
1094 // and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeLocaleCompare)1095 BUILTIN(StringPrototypeLocaleCompare) {
1096   HandleScope handle_scope(isolate);
1097   DCHECK_EQ(2, args.length());
1098 
1099   TO_THIS_STRING(str1, "String.prototype.localeCompare");
1100   Handle<String> str2;
1101   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
1102                                      Object::ToString(isolate, args.at(1)));
1103 
1104   if (str1.is_identical_to(str2)) return Smi::kZero;  // Equal.
1105   int str1_length = str1->length();
1106   int str2_length = str2->length();
1107 
1108   // Decide trivial cases without flattening.
1109   if (str1_length == 0) {
1110     if (str2_length == 0) return Smi::kZero;  // Equal.
1111     return Smi::FromInt(-str2_length);
1112   } else {
1113     if (str2_length == 0) return Smi::FromInt(str1_length);
1114   }
1115 
1116   int end = str1_length < str2_length ? str1_length : str2_length;
1117 
1118   // No need to flatten if we are going to find the answer on the first
1119   // character. At this point we know there is at least one character
1120   // in each string, due to the trivial case handling above.
1121   int d = str1->Get(0) - str2->Get(0);
1122   if (d != 0) return Smi::FromInt(d);
1123 
1124   str1 = String::Flatten(str1);
1125   str2 = String::Flatten(str2);
1126 
1127   DisallowHeapAllocation no_gc;
1128   String::FlatContent flat1 = str1->GetFlatContent();
1129   String::FlatContent flat2 = str2->GetFlatContent();
1130 
1131   for (int i = 0; i < end; i++) {
1132     if (flat1.Get(i) != flat2.Get(i)) {
1133       return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
1134     }
1135   }
1136 
1137   return Smi::FromInt(str1_length - str2_length);
1138 }
1139 
1140 // ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
1141 //
1142 // Simply checks the argument is valid and returns the string itself.
1143 // If internationalization is enabled, then i18n.js will override this function
1144 // and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeNormalize)1145 BUILTIN(StringPrototypeNormalize) {
1146   HandleScope handle_scope(isolate);
1147   TO_THIS_STRING(string, "String.prototype.normalize");
1148 
1149   Handle<Object> form_input = args.atOrUndefined(isolate, 1);
1150   if (form_input->IsUndefined(isolate)) return *string;
1151 
1152   Handle<String> form;
1153   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
1154                                      Object::ToString(isolate, form_input));
1155 
1156   if (!(String::Equals(form,
1157                        isolate->factory()->NewStringFromStaticChars("NFC")) ||
1158         String::Equals(form,
1159                        isolate->factory()->NewStringFromStaticChars("NFD")) ||
1160         String::Equals(form,
1161                        isolate->factory()->NewStringFromStaticChars("NFKC")) ||
1162         String::Equals(form,
1163                        isolate->factory()->NewStringFromStaticChars("NFKD")))) {
1164     Handle<String> valid_forms =
1165         isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
1166     THROW_NEW_ERROR_RETURN_FAILURE(
1167         isolate,
1168         NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
1169   }
1170 
1171   return *string;
1172 }
1173 
IsNullOrUndefined(Node * const value)1174 compiler::Node* StringBuiltinsAssembler::IsNullOrUndefined(Node* const value) {
1175   return Word32Or(IsUndefined(value), IsNull(value));
1176 }
1177 
RequireObjectCoercible(Node * const context,Node * const value,const char * method_name)1178 void StringBuiltinsAssembler::RequireObjectCoercible(Node* const context,
1179                                                      Node* const value,
1180                                                      const char* method_name) {
1181   Label out(this), throw_exception(this, Label::kDeferred);
1182   Branch(IsNullOrUndefined(value), &throw_exception, &out);
1183 
1184   Bind(&throw_exception);
1185   TailCallRuntime(
1186       Runtime::kThrowCalledOnNullOrUndefined, context,
1187       HeapConstant(factory()->NewStringFromAsciiChecked(method_name, TENURED)));
1188 
1189   Bind(&out);
1190 }
1191 
MaybeCallFunctionAtSymbol(Node * const context,Node * const object,Node * const maybe_string,Handle<Symbol> symbol,const NodeFunction0 & regexp_call,const NodeFunction1 & generic_call)1192 void StringBuiltinsAssembler::MaybeCallFunctionAtSymbol(
1193     Node* const context, Node* const object, Node* const maybe_string,
1194     Handle<Symbol> symbol, const NodeFunction0& regexp_call,
1195     const NodeFunction1& generic_call) {
1196   Label out(this);
1197 
1198   // Smis definitely don't have an attached symbol.
1199   GotoIf(TaggedIsSmi(object), &out);
1200 
1201   Node* const object_map = LoadMap(object);
1202 
1203   // Skip the slow lookup for Strings.
1204   {
1205     Label next(this);
1206 
1207     GotoIfNot(IsStringInstanceType(LoadMapInstanceType(object_map)), &next);
1208 
1209     Node* const native_context = LoadNativeContext(context);
1210     Node* const initial_proto_initial_map = LoadContextElement(
1211         native_context, Context::STRING_FUNCTION_PROTOTYPE_MAP_INDEX);
1212 
1213     Node* const string_fun =
1214         LoadContextElement(native_context, Context::STRING_FUNCTION_INDEX);
1215     Node* const initial_map =
1216         LoadObjectField(string_fun, JSFunction::kPrototypeOrInitialMapOffset);
1217     Node* const proto_map = LoadMap(LoadMapPrototype(initial_map));
1218 
1219     Branch(WordEqual(proto_map, initial_proto_initial_map), &out, &next);
1220 
1221     Bind(&next);
1222   }
1223 
1224   // Take the fast path for RegExps.
1225   // There's two conditions: {object} needs to be a fast regexp, and
1226   // {maybe_string} must be a string (we can't call ToString on the fast path
1227   // since it may mutate {object}).
1228   {
1229     Label stub_call(this), slow_lookup(this);
1230 
1231     GotoIf(TaggedIsSmi(maybe_string), &slow_lookup);
1232     GotoIfNot(IsString(maybe_string), &slow_lookup);
1233 
1234     RegExpBuiltinsAssembler regexp_asm(state());
1235     regexp_asm.BranchIfFastRegExp(context, object, object_map, &stub_call,
1236                                   &slow_lookup);
1237 
1238     Bind(&stub_call);
1239     Return(regexp_call());
1240 
1241     Bind(&slow_lookup);
1242   }
1243 
1244   GotoIf(IsNullOrUndefined(object), &out);
1245 
1246   // Fall back to a slow lookup of {object[symbol]}.
1247 
1248   Callable getproperty_callable = CodeFactory::GetProperty(isolate());
1249   Node* const key = HeapConstant(symbol);
1250   Node* const maybe_func = CallStub(getproperty_callable, context, object, key);
1251 
1252   GotoIf(IsUndefined(maybe_func), &out);
1253 
1254   // Attempt to call the function.
1255 
1256   Node* const result = generic_call(maybe_func);
1257   Return(result);
1258 
1259   Bind(&out);
1260 }
1261 
1262 // ES6 section 21.1.3.16 String.prototype.replace ( search, replace )
TF_BUILTIN(StringPrototypeReplace,StringBuiltinsAssembler)1263 TF_BUILTIN(StringPrototypeReplace, StringBuiltinsAssembler) {
1264   Label out(this);
1265 
1266   Node* const receiver = Parameter(0);
1267   Node* const search = Parameter(1);
1268   Node* const replace = Parameter(2);
1269   Node* const context = Parameter(5);
1270 
1271   Node* const smi_zero = SmiConstant(0);
1272 
1273   RequireObjectCoercible(context, receiver, "String.prototype.replace");
1274 
1275   // Redirect to replacer method if {search[@@replace]} is not undefined.
1276 
1277   MaybeCallFunctionAtSymbol(
1278       context, search, receiver, isolate()->factory()->replace_symbol(),
1279       [=]() {
1280         Callable tostring_callable = CodeFactory::ToString(isolate());
1281         Node* const subject_string =
1282             CallStub(tostring_callable, context, receiver);
1283 
1284         Callable replace_callable = CodeFactory::RegExpReplace(isolate());
1285         return CallStub(replace_callable, context, search, subject_string,
1286                         replace);
1287       },
1288       [=](Node* fn) {
1289         Callable call_callable = CodeFactory::Call(isolate());
1290         return CallJS(call_callable, context, fn, search, receiver, replace);
1291       });
1292 
1293   // Convert {receiver} and {search} to strings.
1294 
1295   Callable tostring_callable = CodeFactory::ToString(isolate());
1296   Callable indexof_callable = CodeFactory::StringIndexOf(isolate());
1297 
1298   Node* const subject_string = CallStub(tostring_callable, context, receiver);
1299   Node* const search_string = CallStub(tostring_callable, context, search);
1300 
1301   Node* const subject_length = LoadStringLength(subject_string);
1302   Node* const search_length = LoadStringLength(search_string);
1303 
1304   // Fast-path single-char {search}, long {receiver}, and simple string
1305   // {replace}.
1306   {
1307     Label next(this);
1308 
1309     GotoIfNot(SmiEqual(search_length, SmiConstant(1)), &next);
1310     GotoIfNot(SmiGreaterThan(subject_length, SmiConstant(0xFF)), &next);
1311     GotoIf(TaggedIsSmi(replace), &next);
1312     GotoIfNot(IsString(replace), &next);
1313 
1314     Node* const dollar_string = HeapConstant(
1315         isolate()->factory()->LookupSingleCharacterStringFromCode('$'));
1316     Node* const dollar_ix =
1317         CallStub(indexof_callable, context, replace, dollar_string, smi_zero);
1318     GotoIfNot(SmiIsNegative(dollar_ix), &next);
1319 
1320     // Searching by traversing a cons string tree and replace with cons of
1321     // slices works only when the replaced string is a single character, being
1322     // replaced by a simple string and only pays off for long strings.
1323     // TODO(jgruber): Reevaluate if this is still beneficial.
1324     // TODO(jgruber): TailCallRuntime when it correctly handles adapter frames.
1325     Return(CallRuntime(Runtime::kStringReplaceOneCharWithString, context,
1326                        subject_string, search_string, replace));
1327 
1328     Bind(&next);
1329   }
1330 
1331   // TODO(jgruber): Extend StringIndexOf to handle two-byte strings and
1332   // longer substrings - we can handle up to 8 chars (one-byte) / 4 chars
1333   // (2-byte).
1334 
1335   Node* const match_start_index = CallStub(
1336       indexof_callable, context, subject_string, search_string, smi_zero);
1337   CSA_ASSERT(this, TaggedIsSmi(match_start_index));
1338 
1339   // Early exit if no match found.
1340   {
1341     Label next(this), return_subject(this);
1342 
1343     GotoIfNot(SmiIsNegative(match_start_index), &next);
1344 
1345     // The spec requires to perform ToString(replace) if the {replace} is not
1346     // callable even if we are going to exit here.
1347     // Since ToString() being applied to Smi does not have side effects for
1348     // numbers we can skip it.
1349     GotoIf(TaggedIsSmi(replace), &return_subject);
1350     GotoIf(IsCallableMap(LoadMap(replace)), &return_subject);
1351 
1352     // TODO(jgruber): Could introduce ToStringSideeffectsStub which only
1353     // performs observable parts of ToString.
1354     CallStub(tostring_callable, context, replace);
1355     Goto(&return_subject);
1356 
1357     Bind(&return_subject);
1358     Return(subject_string);
1359 
1360     Bind(&next);
1361   }
1362 
1363   Node* const match_end_index = SmiAdd(match_start_index, search_length);
1364 
1365   Callable substring_callable = CodeFactory::SubString(isolate());
1366   Callable stringadd_callable =
1367       CodeFactory::StringAdd(isolate(), STRING_ADD_CHECK_NONE, NOT_TENURED);
1368 
1369   Variable var_result(this, MachineRepresentation::kTagged,
1370                       EmptyStringConstant());
1371 
1372   // Compute the prefix.
1373   {
1374     Label next(this);
1375 
1376     GotoIf(SmiEqual(match_start_index, smi_zero), &next);
1377     Node* const prefix = CallStub(substring_callable, context, subject_string,
1378                                   smi_zero, match_start_index);
1379     var_result.Bind(prefix);
1380 
1381     Goto(&next);
1382     Bind(&next);
1383   }
1384 
1385   // Compute the string to replace with.
1386 
1387   Label if_iscallablereplace(this), if_notcallablereplace(this);
1388   GotoIf(TaggedIsSmi(replace), &if_notcallablereplace);
1389   Branch(IsCallableMap(LoadMap(replace)), &if_iscallablereplace,
1390          &if_notcallablereplace);
1391 
1392   Bind(&if_iscallablereplace);
1393   {
1394     Callable call_callable = CodeFactory::Call(isolate());
1395     Node* const replacement =
1396         CallJS(call_callable, context, replace, UndefinedConstant(),
1397                search_string, match_start_index, subject_string);
1398     Node* const replacement_string =
1399         CallStub(tostring_callable, context, replacement);
1400     var_result.Bind(CallStub(stringadd_callable, context, var_result.value(),
1401                              replacement_string));
1402     Goto(&out);
1403   }
1404 
1405   Bind(&if_notcallablereplace);
1406   {
1407     Node* const replace_string = CallStub(tostring_callable, context, replace);
1408 
1409     // TODO(jgruber): Simplified GetSubstitution implementation in CSA.
1410     Node* const matched = CallStub(substring_callable, context, subject_string,
1411                                    match_start_index, match_end_index);
1412     Node* const replacement_string =
1413         CallRuntime(Runtime::kGetSubstitution, context, matched, subject_string,
1414                     match_start_index, replace_string);
1415     var_result.Bind(CallStub(stringadd_callable, context, var_result.value(),
1416                              replacement_string));
1417     Goto(&out);
1418   }
1419 
1420   Bind(&out);
1421   {
1422     Node* const suffix = CallStub(substring_callable, context, subject_string,
1423                                   match_end_index, subject_length);
1424     Node* const result =
1425         CallStub(stringadd_callable, context, var_result.value(), suffix);
1426     Return(result);
1427   }
1428 }
1429 
1430 // ES6 section 21.1.3.19 String.prototype.split ( separator, limit )
TF_BUILTIN(StringPrototypeSplit,StringBuiltinsAssembler)1431 TF_BUILTIN(StringPrototypeSplit, StringBuiltinsAssembler) {
1432   Label out(this);
1433 
1434   Node* const receiver = Parameter(0);
1435   Node* const separator = Parameter(1);
1436   Node* const limit = Parameter(2);
1437   Node* const context = Parameter(5);
1438 
1439   Node* const smi_zero = SmiConstant(0);
1440 
1441   RequireObjectCoercible(context, receiver, "String.prototype.split");
1442 
1443   // Redirect to splitter method if {separator[@@split]} is not undefined.
1444 
1445   MaybeCallFunctionAtSymbol(
1446       context, separator, receiver, isolate()->factory()->split_symbol(),
1447       [=]() {
1448         Callable tostring_callable = CodeFactory::ToString(isolate());
1449         Node* const subject_string =
1450             CallStub(tostring_callable, context, receiver);
1451 
1452         Callable split_callable = CodeFactory::RegExpSplit(isolate());
1453         return CallStub(split_callable, context, separator, subject_string,
1454                         limit);
1455       },
1456       [=](Node* fn) {
1457         Callable call_callable = CodeFactory::Call(isolate());
1458         return CallJS(call_callable, context, fn, separator, receiver, limit);
1459       });
1460 
1461   // String and integer conversions.
1462   // TODO(jgruber): The old implementation used Uint32Max instead of SmiMax -
1463   // but AFAIK there should not be a difference since arrays are capped at Smi
1464   // lengths.
1465 
1466   Callable tostring_callable = CodeFactory::ToString(isolate());
1467   Node* const subject_string = CallStub(tostring_callable, context, receiver);
1468   Node* const limit_number =
1469       Select(IsUndefined(limit), [=]() { return SmiConstant(Smi::kMaxValue); },
1470              [=]() { return ToUint32(context, limit); },
1471              MachineRepresentation::kTagged);
1472   Node* const separator_string =
1473       CallStub(tostring_callable, context, separator);
1474 
1475   // Shortcut for {limit} == 0.
1476   {
1477     Label next(this);
1478     GotoIfNot(SmiEqual(limit_number, smi_zero), &next);
1479 
1480     const ElementsKind kind = FAST_ELEMENTS;
1481     Node* const native_context = LoadNativeContext(context);
1482     Node* const array_map = LoadJSArrayElementsMap(kind, native_context);
1483 
1484     Node* const length = smi_zero;
1485     Node* const capacity = IntPtrConstant(0);
1486     Node* const result = AllocateJSArray(kind, array_map, capacity, length);
1487 
1488     Return(result);
1489 
1490     Bind(&next);
1491   }
1492 
1493   // ECMA-262 says that if {separator} is undefined, the result should
1494   // be an array of size 1 containing the entire string.
1495   {
1496     Label next(this);
1497     GotoIfNot(IsUndefined(separator), &next);
1498 
1499     const ElementsKind kind = FAST_ELEMENTS;
1500     Node* const native_context = LoadNativeContext(context);
1501     Node* const array_map = LoadJSArrayElementsMap(kind, native_context);
1502 
1503     Node* const length = SmiConstant(1);
1504     Node* const capacity = IntPtrConstant(1);
1505     Node* const result = AllocateJSArray(kind, array_map, capacity, length);
1506 
1507     Node* const fixed_array = LoadElements(result);
1508     StoreFixedArrayElement(fixed_array, 0, subject_string);
1509 
1510     Return(result);
1511 
1512     Bind(&next);
1513   }
1514 
1515   // If the separator string is empty then return the elements in the subject.
1516   {
1517     Label next(this);
1518     GotoIfNot(SmiEqual(LoadStringLength(separator_string), smi_zero), &next);
1519 
1520     Node* const result = CallRuntime(Runtime::kStringToArray, context,
1521                                      subject_string, limit_number);
1522     Return(result);
1523 
1524     Bind(&next);
1525   }
1526 
1527   Node* const result =
1528       CallRuntime(Runtime::kStringSplit, context, subject_string,
1529                   separator_string, limit_number);
1530   Return(result);
1531 }
1532 
1533 // ES6 section B.2.3.1 String.prototype.substr ( start, length )
TF_BUILTIN(StringPrototypeSubstr,CodeStubAssembler)1534 TF_BUILTIN(StringPrototypeSubstr, CodeStubAssembler) {
1535   Label out(this), handle_length(this);
1536 
1537   Variable var_start(this, MachineRepresentation::kTagged);
1538   Variable var_length(this, MachineRepresentation::kTagged);
1539 
1540   Node* const receiver = Parameter(0);
1541   Node* const start = Parameter(1);
1542   Node* const length = Parameter(2);
1543   Node* const context = Parameter(5);
1544 
1545   Node* const zero = SmiConstant(Smi::kZero);
1546 
1547   // Check that {receiver} is coercible to Object and convert it to a String.
1548   Node* const string =
1549       ToThisString(context, receiver, "String.prototype.substr");
1550 
1551   Node* const string_length = LoadStringLength(string);
1552 
1553   // Conversions and bounds-checks for {start}.
1554   {
1555     Node* const start_int =
1556         ToInteger(context, start, CodeStubAssembler::kTruncateMinusZero);
1557 
1558     Label if_issmi(this), if_isheapnumber(this, Label::kDeferred);
1559     Branch(TaggedIsSmi(start_int), &if_issmi, &if_isheapnumber);
1560 
1561     Bind(&if_issmi);
1562     {
1563       Node* const length_plus_start = SmiAdd(string_length, start_int);
1564       var_start.Bind(Select(SmiLessThan(start_int, zero),
1565                             [&] { return SmiMax(length_plus_start, zero); },
1566                             [&] { return start_int; },
1567                             MachineRepresentation::kTagged));
1568       Goto(&handle_length);
1569     }
1570 
1571     Bind(&if_isheapnumber);
1572     {
1573       // If {start} is a heap number, it is definitely out of bounds. If it is
1574       // negative, {start} = max({string_length} + {start}),0) = 0'. If it is
1575       // positive, set {start} to {string_length} which ultimately results in
1576       // returning an empty string.
1577       Node* const float_zero = Float64Constant(0.);
1578       Node* const start_float = LoadHeapNumberValue(start_int);
1579       var_start.Bind(SelectTaggedConstant(
1580           Float64LessThan(start_float, float_zero), zero, string_length));
1581       Goto(&handle_length);
1582     }
1583   }
1584 
1585   // Conversions and bounds-checks for {length}.
1586   Bind(&handle_length);
1587   {
1588     Label if_issmi(this), if_isheapnumber(this, Label::kDeferred);
1589 
1590     // Default to {string_length} if {length} is undefined.
1591     {
1592       Label if_isundefined(this, Label::kDeferred), if_isnotundefined(this);
1593       Branch(WordEqual(length, UndefinedConstant()), &if_isundefined,
1594              &if_isnotundefined);
1595 
1596       Bind(&if_isundefined);
1597       var_length.Bind(string_length);
1598       Goto(&if_issmi);
1599 
1600       Bind(&if_isnotundefined);
1601       var_length.Bind(
1602           ToInteger(context, length, CodeStubAssembler::kTruncateMinusZero));
1603     }
1604 
1605     Branch(TaggedIsSmi(var_length.value()), &if_issmi, &if_isheapnumber);
1606 
1607     // Set {length} to min(max({length}, 0), {string_length} - {start}
1608     Bind(&if_issmi);
1609     {
1610       Node* const positive_length = SmiMax(var_length.value(), zero);
1611 
1612       Node* const minimal_length = SmiSub(string_length, var_start.value());
1613       var_length.Bind(SmiMin(positive_length, minimal_length));
1614 
1615       GotoIfNot(SmiLessThanOrEqual(var_length.value(), zero), &out);
1616       Return(EmptyStringConstant());
1617     }
1618 
1619     Bind(&if_isheapnumber);
1620     {
1621       // If {length} is a heap number, it is definitely out of bounds. There are
1622       // two cases according to the spec: if it is negative, "" is returned; if
1623       // it is positive, then length is set to {string_length} - {start}.
1624 
1625       CSA_ASSERT(this, IsHeapNumberMap(LoadMap(var_length.value())));
1626 
1627       Label if_isnegative(this), if_ispositive(this);
1628       Node* const float_zero = Float64Constant(0.);
1629       Node* const length_float = LoadHeapNumberValue(var_length.value());
1630       Branch(Float64LessThan(length_float, float_zero), &if_isnegative,
1631              &if_ispositive);
1632 
1633       Bind(&if_isnegative);
1634       Return(EmptyStringConstant());
1635 
1636       Bind(&if_ispositive);
1637       {
1638         var_length.Bind(SmiSub(string_length, var_start.value()));
1639         GotoIfNot(SmiLessThanOrEqual(var_length.value(), zero), &out);
1640         Return(EmptyStringConstant());
1641       }
1642     }
1643   }
1644 
1645   Bind(&out);
1646   {
1647     Node* const end = SmiAdd(var_start.value(), var_length.value());
1648     Node* const result = SubString(context, string, var_start.value(), end);
1649     Return(result);
1650   }
1651 }
1652 
ToSmiBetweenZeroAnd(Node * context,Node * value,Node * limit)1653 compiler::Node* StringBuiltinsAssembler::ToSmiBetweenZeroAnd(Node* context,
1654                                                              Node* value,
1655                                                              Node* limit) {
1656   Label out(this);
1657   Variable var_result(this, MachineRepresentation::kTagged);
1658 
1659   Node* const value_int =
1660       this->ToInteger(context, value, CodeStubAssembler::kTruncateMinusZero);
1661 
1662   Label if_issmi(this), if_isnotsmi(this, Label::kDeferred);
1663   Branch(TaggedIsSmi(value_int), &if_issmi, &if_isnotsmi);
1664 
1665   Bind(&if_issmi);
1666   {
1667     Label if_isinbounds(this), if_isoutofbounds(this, Label::kDeferred);
1668     Branch(SmiAbove(value_int, limit), &if_isoutofbounds, &if_isinbounds);
1669 
1670     Bind(&if_isinbounds);
1671     {
1672       var_result.Bind(value_int);
1673       Goto(&out);
1674     }
1675 
1676     Bind(&if_isoutofbounds);
1677     {
1678       Node* const zero = SmiConstant(Smi::kZero);
1679       var_result.Bind(
1680           SelectTaggedConstant(SmiLessThan(value_int, zero), zero, limit));
1681       Goto(&out);
1682     }
1683   }
1684 
1685   Bind(&if_isnotsmi);
1686   {
1687     // {value} is a heap number - in this case, it is definitely out of bounds.
1688     CSA_ASSERT(this, IsHeapNumberMap(LoadMap(value_int)));
1689 
1690     Node* const float_zero = Float64Constant(0.);
1691     Node* const smi_zero = SmiConstant(Smi::kZero);
1692     Node* const value_float = LoadHeapNumberValue(value_int);
1693     var_result.Bind(SelectTaggedConstant(
1694         Float64LessThan(value_float, float_zero), smi_zero, limit));
1695     Goto(&out);
1696   }
1697 
1698   Bind(&out);
1699   return var_result.value();
1700 }
1701 
1702 // ES6 section 21.1.3.19 String.prototype.substring ( start, end )
TF_BUILTIN(StringPrototypeSubstring,StringBuiltinsAssembler)1703 TF_BUILTIN(StringPrototypeSubstring, StringBuiltinsAssembler) {
1704   Label out(this);
1705 
1706   Variable var_start(this, MachineRepresentation::kTagged);
1707   Variable var_end(this, MachineRepresentation::kTagged);
1708 
1709   Node* const receiver = Parameter(0);
1710   Node* const start = Parameter(1);
1711   Node* const end = Parameter(2);
1712   Node* const context = Parameter(5);
1713 
1714   // Check that {receiver} is coercible to Object and convert it to a String.
1715   Node* const string =
1716       ToThisString(context, receiver, "String.prototype.substring");
1717 
1718   Node* const length = LoadStringLength(string);
1719 
1720   // Conversion and bounds-checks for {start}.
1721   var_start.Bind(ToSmiBetweenZeroAnd(context, start, length));
1722 
1723   // Conversion and bounds-checks for {end}.
1724   {
1725     var_end.Bind(length);
1726     GotoIf(WordEqual(end, UndefinedConstant()), &out);
1727 
1728     var_end.Bind(ToSmiBetweenZeroAnd(context, end, length));
1729 
1730     Label if_endislessthanstart(this);
1731     Branch(SmiLessThan(var_end.value(), var_start.value()),
1732            &if_endislessthanstart, &out);
1733 
1734     Bind(&if_endislessthanstart);
1735     {
1736       Node* const tmp = var_end.value();
1737       var_end.Bind(var_start.value());
1738       var_start.Bind(tmp);
1739       Goto(&out);
1740     }
1741   }
1742 
1743   Bind(&out);
1744   {
1745     Node* result =
1746         SubString(context, string, var_start.value(), var_end.value());
1747     Return(result);
1748   }
1749 }
1750 
BUILTIN(StringPrototypeStartsWith)1751 BUILTIN(StringPrototypeStartsWith) {
1752   HandleScope handle_scope(isolate);
1753   TO_THIS_STRING(str, "String.prototype.startsWith");
1754 
1755   // Check if the search string is a regExp and fail if it is.
1756   Handle<Object> search = args.atOrUndefined(isolate, 1);
1757   Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
1758   if (is_reg_exp.IsNothing()) {
1759     DCHECK(isolate->has_pending_exception());
1760     return isolate->heap()->exception();
1761   }
1762   if (is_reg_exp.FromJust()) {
1763     THROW_NEW_ERROR_RETURN_FAILURE(
1764         isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
1765                               isolate->factory()->NewStringFromStaticChars(
1766                                   "String.prototype.startsWith")));
1767   }
1768   Handle<String> search_string;
1769   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1770                                      Object::ToString(isolate, search));
1771 
1772   Handle<Object> position = args.atOrUndefined(isolate, 2);
1773   int start;
1774 
1775   if (position->IsUndefined(isolate)) {
1776     start = 0;
1777   } else {
1778     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1779                                        Object::ToInteger(isolate, position));
1780     start = str->ToValidIndex(*position);
1781   }
1782 
1783   if (start + search_string->length() > str->length()) {
1784     return isolate->heap()->false_value();
1785   }
1786 
1787   FlatStringReader str_reader(isolate, String::Flatten(str));
1788   FlatStringReader search_reader(isolate, String::Flatten(search_string));
1789 
1790   for (int i = 0; i < search_string->length(); i++) {
1791     if (str_reader.Get(start + i) != search_reader.Get(i)) {
1792       return isolate->heap()->false_value();
1793     }
1794   }
1795   return isolate->heap()->true_value();
1796 }
1797 
1798 // ES6 section 21.1.3.25 String.prototype.toString ()
TF_BUILTIN(StringPrototypeToString,CodeStubAssembler)1799 TF_BUILTIN(StringPrototypeToString, CodeStubAssembler) {
1800   Node* receiver = Parameter(0);
1801   Node* context = Parameter(3);
1802 
1803   Node* result = ToThisValue(context, receiver, PrimitiveType::kString,
1804                              "String.prototype.toString");
1805   Return(result);
1806 }
1807 
1808 // ES6 section 21.1.3.27 String.prototype.trim ()
BUILTIN(StringPrototypeTrim)1809 BUILTIN(StringPrototypeTrim) {
1810   HandleScope scope(isolate);
1811   TO_THIS_STRING(string, "String.prototype.trim");
1812   return *String::Trim(string, String::kTrim);
1813 }
1814 
1815 // Non-standard WebKit extension
BUILTIN(StringPrototypeTrimLeft)1816 BUILTIN(StringPrototypeTrimLeft) {
1817   HandleScope scope(isolate);
1818   TO_THIS_STRING(string, "String.prototype.trimLeft");
1819   return *String::Trim(string, String::kTrimLeft);
1820 }
1821 
1822 // Non-standard WebKit extension
BUILTIN(StringPrototypeTrimRight)1823 BUILTIN(StringPrototypeTrimRight) {
1824   HandleScope scope(isolate);
1825   TO_THIS_STRING(string, "String.prototype.trimRight");
1826   return *String::Trim(string, String::kTrimRight);
1827 }
1828 
1829 // ES6 section 21.1.3.28 String.prototype.valueOf ( )
TF_BUILTIN(StringPrototypeValueOf,CodeStubAssembler)1830 TF_BUILTIN(StringPrototypeValueOf, CodeStubAssembler) {
1831   Node* receiver = Parameter(0);
1832   Node* context = Parameter(3);
1833 
1834   Node* result = ToThisValue(context, receiver, PrimitiveType::kString,
1835                              "String.prototype.valueOf");
1836   Return(result);
1837 }
1838 
TF_BUILTIN(StringPrototypeIterator,CodeStubAssembler)1839 TF_BUILTIN(StringPrototypeIterator, CodeStubAssembler) {
1840   Node* receiver = Parameter(0);
1841   Node* context = Parameter(3);
1842 
1843   Node* string =
1844       ToThisString(context, receiver, "String.prototype[Symbol.iterator]");
1845 
1846   Node* native_context = LoadNativeContext(context);
1847   Node* map =
1848       LoadContextElement(native_context, Context::STRING_ITERATOR_MAP_INDEX);
1849   Node* iterator = Allocate(JSStringIterator::kSize);
1850   StoreMapNoWriteBarrier(iterator, map);
1851   StoreObjectFieldRoot(iterator, JSValue::kPropertiesOffset,
1852                        Heap::kEmptyFixedArrayRootIndex);
1853   StoreObjectFieldRoot(iterator, JSObject::kElementsOffset,
1854                        Heap::kEmptyFixedArrayRootIndex);
1855   StoreObjectFieldNoWriteBarrier(iterator, JSStringIterator::kStringOffset,
1856                                  string);
1857   Node* index = SmiConstant(Smi::kZero);
1858   StoreObjectFieldNoWriteBarrier(iterator, JSStringIterator::kNextIndexOffset,
1859                                  index);
1860   Return(iterator);
1861 }
1862 
1863 // Return the |word32| codepoint at {index}. Supports SeqStrings and
1864 // ExternalStrings.
LoadSurrogatePairAt(compiler::Node * string,compiler::Node * length,compiler::Node * index,UnicodeEncoding encoding)1865 compiler::Node* StringBuiltinsAssembler::LoadSurrogatePairAt(
1866     compiler::Node* string, compiler::Node* length, compiler::Node* index,
1867     UnicodeEncoding encoding) {
1868   Label handle_surrogate_pair(this), return_result(this);
1869   Variable var_result(this, MachineRepresentation::kWord32);
1870   Variable var_trail(this, MachineRepresentation::kWord32);
1871   var_result.Bind(StringCharCodeAt(string, index));
1872   var_trail.Bind(Int32Constant(0));
1873 
1874   GotoIf(Word32NotEqual(Word32And(var_result.value(), Int32Constant(0xFC00)),
1875                         Int32Constant(0xD800)),
1876          &return_result);
1877   Node* next_index = SmiAdd(index, SmiConstant(Smi::FromInt(1)));
1878 
1879   GotoIfNot(SmiLessThan(next_index, length), &return_result);
1880   var_trail.Bind(StringCharCodeAt(string, next_index));
1881   Branch(Word32Equal(Word32And(var_trail.value(), Int32Constant(0xFC00)),
1882                      Int32Constant(0xDC00)),
1883          &handle_surrogate_pair, &return_result);
1884 
1885   Bind(&handle_surrogate_pair);
1886   {
1887     Node* lead = var_result.value();
1888     Node* trail = var_trail.value();
1889 
1890     // Check that this path is only taken if a surrogate pair is found
1891     CSA_SLOW_ASSERT(this,
1892                     Uint32GreaterThanOrEqual(lead, Int32Constant(0xD800)));
1893     CSA_SLOW_ASSERT(this, Uint32LessThan(lead, Int32Constant(0xDC00)));
1894     CSA_SLOW_ASSERT(this,
1895                     Uint32GreaterThanOrEqual(trail, Int32Constant(0xDC00)));
1896     CSA_SLOW_ASSERT(this, Uint32LessThan(trail, Int32Constant(0xE000)));
1897 
1898     switch (encoding) {
1899       case UnicodeEncoding::UTF16:
1900         var_result.Bind(Word32Or(
1901 // Need to swap the order for big-endian platforms
1902 #if V8_TARGET_BIG_ENDIAN
1903             Word32Shl(lead, Int32Constant(16)), trail));
1904 #else
1905             Word32Shl(trail, Int32Constant(16)), lead));
1906 #endif
1907         break;
1908 
1909       case UnicodeEncoding::UTF32: {
1910         // Convert UTF16 surrogate pair into |word32| code point, encoded as
1911         // UTF32.
1912         Node* surrogate_offset =
1913             Int32Constant(0x10000 - (0xD800 << 10) - 0xDC00);
1914 
1915         // (lead << 10) + trail + SURROGATE_OFFSET
1916         var_result.Bind(Int32Add(WordShl(lead, Int32Constant(10)),
1917                                  Int32Add(trail, surrogate_offset)));
1918         break;
1919       }
1920     }
1921     Goto(&return_result);
1922   }
1923 
1924   Bind(&return_result);
1925   return var_result.value();
1926 }
1927 
TF_BUILTIN(StringIteratorPrototypeNext,StringBuiltinsAssembler)1928 TF_BUILTIN(StringIteratorPrototypeNext, StringBuiltinsAssembler) {
1929   Variable var_value(this, MachineRepresentation::kTagged);
1930   Variable var_done(this, MachineRepresentation::kTagged);
1931 
1932   var_value.Bind(UndefinedConstant());
1933   var_done.Bind(BooleanConstant(true));
1934 
1935   Label throw_bad_receiver(this), next_codepoint(this), return_result(this);
1936 
1937   Node* iterator = Parameter(0);
1938   Node* context = Parameter(3);
1939 
1940   GotoIf(TaggedIsSmi(iterator), &throw_bad_receiver);
1941   GotoIfNot(Word32Equal(LoadInstanceType(iterator),
1942                         Int32Constant(JS_STRING_ITERATOR_TYPE)),
1943             &throw_bad_receiver);
1944 
1945   Node* string = LoadObjectField(iterator, JSStringIterator::kStringOffset);
1946   Node* position =
1947       LoadObjectField(iterator, JSStringIterator::kNextIndexOffset);
1948   Node* length = LoadObjectField(string, String::kLengthOffset);
1949 
1950   Branch(SmiLessThan(position, length), &next_codepoint, &return_result);
1951 
1952   Bind(&next_codepoint);
1953   {
1954     UnicodeEncoding encoding = UnicodeEncoding::UTF16;
1955     Node* ch = LoadSurrogatePairAt(string, length, position, encoding);
1956     Node* value = StringFromCodePoint(ch, encoding);
1957     var_value.Bind(value);
1958     Node* length = LoadObjectField(value, String::kLengthOffset);
1959     StoreObjectFieldNoWriteBarrier(iterator, JSStringIterator::kNextIndexOffset,
1960                                    SmiAdd(position, length));
1961     var_done.Bind(BooleanConstant(false));
1962     Goto(&return_result);
1963   }
1964 
1965   Bind(&return_result);
1966   {
1967     Node* native_context = LoadNativeContext(context);
1968     Node* map =
1969         LoadContextElement(native_context, Context::ITERATOR_RESULT_MAP_INDEX);
1970     Node* result = Allocate(JSIteratorResult::kSize);
1971     StoreMapNoWriteBarrier(result, map);
1972     StoreObjectFieldRoot(result, JSIteratorResult::kPropertiesOffset,
1973                          Heap::kEmptyFixedArrayRootIndex);
1974     StoreObjectFieldRoot(result, JSIteratorResult::kElementsOffset,
1975                          Heap::kEmptyFixedArrayRootIndex);
1976     StoreObjectFieldNoWriteBarrier(result, JSIteratorResult::kValueOffset,
1977                                    var_value.value());
1978     StoreObjectFieldNoWriteBarrier(result, JSIteratorResult::kDoneOffset,
1979                                    var_done.value());
1980     Return(result);
1981   }
1982 
1983   Bind(&throw_bad_receiver);
1984   {
1985     // The {receiver} is not a valid JSGeneratorObject.
1986     CallRuntime(Runtime::kThrowIncompatibleMethodReceiver, context,
1987                 HeapConstant(factory()->NewStringFromAsciiChecked(
1988                     "String Iterator.prototype.next", TENURED)),
1989                 iterator);
1990     Unreachable();
1991   }
1992 }
1993 
1994 namespace {
1995 
ToUpperOverflows(uc32 character)1996 inline bool ToUpperOverflows(uc32 character) {
1997   // y with umlauts and the micro sign are the only characters that stop
1998   // fitting into one-byte when converting to uppercase.
1999   static const uc32 yuml_code = 0xff;
2000   static const uc32 micro_code = 0xb5;
2001   return (character == yuml_code || character == micro_code);
2002 }
2003 
2004 template <class Converter>
ConvertCaseHelper(Isolate * isolate,String * string,SeqString * result,int result_length,unibrow::Mapping<Converter,128> * mapping)2005 MUST_USE_RESULT static Object* ConvertCaseHelper(
2006     Isolate* isolate, String* string, SeqString* result, int result_length,
2007     unibrow::Mapping<Converter, 128>* mapping) {
2008   DisallowHeapAllocation no_gc;
2009   // We try this twice, once with the assumption that the result is no longer
2010   // than the input and, if that assumption breaks, again with the exact
2011   // length.  This may not be pretty, but it is nicer than what was here before
2012   // and I hereby claim my vaffel-is.
2013   //
2014   // NOTE: This assumes that the upper/lower case of an ASCII
2015   // character is also ASCII.  This is currently the case, but it
2016   // might break in the future if we implement more context and locale
2017   // dependent upper/lower conversions.
2018   bool has_changed_character = false;
2019 
2020   // Convert all characters to upper case, assuming that they will fit
2021   // in the buffer
2022   StringCharacterStream stream(string);
2023   unibrow::uchar chars[Converter::kMaxWidth];
2024   // We can assume that the string is not empty
2025   uc32 current = stream.GetNext();
2026   bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
2027   for (int i = 0; i < result_length;) {
2028     bool has_next = stream.HasMore();
2029     uc32 next = has_next ? stream.GetNext() : 0;
2030     int char_length = mapping->get(current, next, chars);
2031     if (char_length == 0) {
2032       // The case conversion of this character is the character itself.
2033       result->Set(i, current);
2034       i++;
2035     } else if (char_length == 1 &&
2036                (ignore_overflow || !ToUpperOverflows(current))) {
2037       // Common case: converting the letter resulted in one character.
2038       DCHECK(static_cast<uc32>(chars[0]) != current);
2039       result->Set(i, chars[0]);
2040       has_changed_character = true;
2041       i++;
2042     } else if (result_length == string->length()) {
2043       bool overflows = ToUpperOverflows(current);
2044       // We've assumed that the result would be as long as the
2045       // input but here is a character that converts to several
2046       // characters.  No matter, we calculate the exact length
2047       // of the result and try the whole thing again.
2048       //
2049       // Note that this leaves room for optimization.  We could just
2050       // memcpy what we already have to the result string.  Also,
2051       // the result string is the last object allocated we could
2052       // "realloc" it and probably, in the vast majority of cases,
2053       // extend the existing string to be able to hold the full
2054       // result.
2055       int next_length = 0;
2056       if (has_next) {
2057         next_length = mapping->get(next, 0, chars);
2058         if (next_length == 0) next_length = 1;
2059       }
2060       int current_length = i + char_length + next_length;
2061       while (stream.HasMore()) {
2062         current = stream.GetNext();
2063         overflows |= ToUpperOverflows(current);
2064         // NOTE: we use 0 as the next character here because, while
2065         // the next character may affect what a character converts to,
2066         // it does not in any case affect the length of what it convert
2067         // to.
2068         int char_length = mapping->get(current, 0, chars);
2069         if (char_length == 0) char_length = 1;
2070         current_length += char_length;
2071         if (current_length > String::kMaxLength) {
2072           AllowHeapAllocation allocate_error_and_return;
2073           THROW_NEW_ERROR_RETURN_FAILURE(isolate,
2074                                          NewInvalidStringLengthError());
2075         }
2076       }
2077       // Try again with the real length.  Return signed if we need
2078       // to allocate a two-byte string for to uppercase.
2079       return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
2080                                              : Smi::FromInt(current_length);
2081     } else {
2082       for (int j = 0; j < char_length; j++) {
2083         result->Set(i, chars[j]);
2084         i++;
2085       }
2086       has_changed_character = true;
2087     }
2088     current = next;
2089   }
2090   if (has_changed_character) {
2091     return result;
2092   } else {
2093     // If we didn't actually change anything in doing the conversion
2094     // we simple return the result and let the converted string
2095     // become garbage; there is no reason to keep two identical strings
2096     // alive.
2097     return string;
2098   }
2099 }
2100 
2101 template <class Converter>
ConvertCase(Handle<String> s,Isolate * isolate,unibrow::Mapping<Converter,128> * mapping)2102 MUST_USE_RESULT static Object* ConvertCase(
2103     Handle<String> s, Isolate* isolate,
2104     unibrow::Mapping<Converter, 128>* mapping) {
2105   s = String::Flatten(s);
2106   int length = s->length();
2107   // Assume that the string is not empty; we need this assumption later
2108   if (length == 0) return *s;
2109 
2110   // Simpler handling of ASCII strings.
2111   //
2112   // NOTE: This assumes that the upper/lower case of an ASCII
2113   // character is also ASCII.  This is currently the case, but it
2114   // might break in the future if we implement more context and locale
2115   // dependent upper/lower conversions.
2116   if (s->IsOneByteRepresentationUnderneath()) {
2117     // Same length as input.
2118     Handle<SeqOneByteString> result =
2119         isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
2120     DisallowHeapAllocation no_gc;
2121     String::FlatContent flat_content = s->GetFlatContent();
2122     DCHECK(flat_content.IsFlat());
2123     bool has_changed_character = false;
2124     int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
2125         reinterpret_cast<char*>(result->GetChars()),
2126         reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
2127         length, &has_changed_character);
2128     // If not ASCII, we discard the result and take the 2 byte path.
2129     if (index_to_first_unprocessed == length)
2130       return has_changed_character ? *result : *s;
2131   }
2132 
2133   Handle<SeqString> result;  // Same length as input.
2134   if (s->IsOneByteRepresentation()) {
2135     result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
2136   } else {
2137     result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
2138   }
2139 
2140   Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
2141   if (answer->IsException(isolate) || answer->IsString()) return answer;
2142 
2143   DCHECK(answer->IsSmi());
2144   length = Smi::cast(answer)->value();
2145   if (s->IsOneByteRepresentation() && length > 0) {
2146     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
2147         isolate, result, isolate->factory()->NewRawOneByteString(length));
2148   } else {
2149     if (length < 0) length = -length;
2150     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
2151         isolate, result, isolate->factory()->NewRawTwoByteString(length));
2152   }
2153   return ConvertCaseHelper(isolate, *s, *result, length, mapping);
2154 }
2155 
2156 }  // namespace
2157 
BUILTIN(StringPrototypeToLocaleLowerCase)2158 BUILTIN(StringPrototypeToLocaleLowerCase) {
2159   HandleScope scope(isolate);
2160   TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
2161   return ConvertCase(string, isolate,
2162                      isolate->runtime_state()->to_lower_mapping());
2163 }
2164 
BUILTIN(StringPrototypeToLocaleUpperCase)2165 BUILTIN(StringPrototypeToLocaleUpperCase) {
2166   HandleScope scope(isolate);
2167   TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
2168   return ConvertCase(string, isolate,
2169                      isolate->runtime_state()->to_upper_mapping());
2170 }
2171 
BUILTIN(StringPrototypeToLowerCase)2172 BUILTIN(StringPrototypeToLowerCase) {
2173   HandleScope scope(isolate);
2174   TO_THIS_STRING(string, "String.prototype.toLowerCase");
2175   return ConvertCase(string, isolate,
2176                      isolate->runtime_state()->to_lower_mapping());
2177 }
2178 
BUILTIN(StringPrototypeToUpperCase)2179 BUILTIN(StringPrototypeToUpperCase) {
2180   HandleScope scope(isolate);
2181   TO_THIS_STRING(string, "String.prototype.toUpperCase");
2182   return ConvertCase(string, isolate,
2183                      isolate->runtime_state()->to_upper_mapping());
2184 }
2185 
2186 }  // namespace internal
2187 }  // namespace v8
2188