1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/builtins/builtins-regexp.h"
6 #include "src/builtins/builtins-utils.h"
7 #include "src/builtins/builtins.h"
8 #include "src/code-factory.h"
9 #include "src/code-stub-assembler.h"
10 #include "src/conversions.h"
11 #include "src/counters.h"
12 #include "src/objects-inl.h"
13 #include "src/regexp/regexp-utils.h"
14 #include "src/string-case.h"
15 #include "src/unicode-inl.h"
16 #include "src/unicode.h"
17
18 namespace v8 {
19 namespace internal {
20
21 typedef CodeStubAssembler::ResultMode ResultMode;
22 typedef CodeStubAssembler::RelationalComparisonMode RelationalComparisonMode;
23
24 class StringBuiltinsAssembler : public CodeStubAssembler {
25 public:
StringBuiltinsAssembler(compiler::CodeAssemblerState * state)26 explicit StringBuiltinsAssembler(compiler::CodeAssemblerState* state)
27 : CodeStubAssembler(state) {}
28
29 protected:
DirectStringData(Node * string,Node * string_instance_type)30 Node* DirectStringData(Node* string, Node* string_instance_type) {
31 // Compute the effective offset of the first character.
32 Variable var_data(this, MachineType::PointerRepresentation());
33 Label if_sequential(this), if_external(this), if_join(this);
34 Branch(Word32Equal(Word32And(string_instance_type,
35 Int32Constant(kStringRepresentationMask)),
36 Int32Constant(kSeqStringTag)),
37 &if_sequential, &if_external);
38
39 Bind(&if_sequential);
40 {
41 var_data.Bind(IntPtrAdd(
42 IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag),
43 BitcastTaggedToWord(string)));
44 Goto(&if_join);
45 }
46
47 Bind(&if_external);
48 {
49 // This is only valid for ExternalStrings where the resource data
50 // pointer is cached (i.e. no short external strings).
51 CSA_ASSERT(this, Word32NotEqual(
52 Word32And(string_instance_type,
53 Int32Constant(kShortExternalStringMask)),
54 Int32Constant(kShortExternalStringTag)));
55 var_data.Bind(LoadObjectField(string, ExternalString::kResourceDataOffset,
56 MachineType::Pointer()));
57 Goto(&if_join);
58 }
59
60 Bind(&if_join);
61 return var_data.value();
62 }
63
LoadOneByteChar(Node * string,Node * index)64 Node* LoadOneByteChar(Node* string, Node* index) {
65 return Load(MachineType::Uint8(), string, OneByteCharOffset(index));
66 }
67
OneByteCharAddress(Node * string,Node * index)68 Node* OneByteCharAddress(Node* string, Node* index) {
69 Node* offset = OneByteCharOffset(index);
70 return IntPtrAdd(string, offset);
71 }
72
OneByteCharOffset(Node * index)73 Node* OneByteCharOffset(Node* index) {
74 return CharOffset(String::ONE_BYTE_ENCODING, index);
75 }
76
CharOffset(String::Encoding encoding,Node * index)77 Node* CharOffset(String::Encoding encoding, Node* index) {
78 const int header = SeqOneByteString::kHeaderSize - kHeapObjectTag;
79 Node* offset = index;
80 if (encoding == String::TWO_BYTE_ENCODING) {
81 offset = IntPtrAdd(offset, offset);
82 }
83 offset = IntPtrAdd(offset, IntPtrConstant(header));
84 return offset;
85 }
86
DispatchOnStringInstanceType(Node * const instance_type,Label * if_onebyte_sequential,Label * if_onebyte_external,Label * if_otherwise)87 void DispatchOnStringInstanceType(Node* const instance_type,
88 Label* if_onebyte_sequential,
89 Label* if_onebyte_external,
90 Label* if_otherwise) {
91 const int kMask = kStringRepresentationMask | kStringEncodingMask;
92 Node* const encoding_and_representation =
93 Word32And(instance_type, Int32Constant(kMask));
94
95 int32_t values[] = {
96 kOneByteStringTag | kSeqStringTag,
97 kOneByteStringTag | kExternalStringTag,
98 };
99 Label* labels[] = {
100 if_onebyte_sequential, if_onebyte_external,
101 };
102 STATIC_ASSERT(arraysize(values) == arraysize(labels));
103
104 Switch(encoding_and_representation, if_otherwise, values, labels,
105 arraysize(values));
106 }
107
108 void GenerateStringEqual(ResultMode mode);
109 void GenerateStringRelationalComparison(RelationalComparisonMode mode);
110
111 Node* ToSmiBetweenZeroAnd(Node* context, Node* value, Node* limit);
112
113 Node* LoadSurrogatePairAt(Node* string, Node* length, Node* index,
114 UnicodeEncoding encoding);
115
116 void StringIndexOf(Node* receiver, Node* instance_type, Node* search_string,
117 Node* search_string_instance_type, Node* position,
118 std::function<void(Node*)> f_return);
119
120 Node* IsNullOrUndefined(Node* const value);
121 void RequireObjectCoercible(Node* const context, Node* const value,
122 const char* method_name);
123
SmiIsNegative(Node * const value)124 Node* SmiIsNegative(Node* const value) {
125 return SmiLessThan(value, SmiConstant(0));
126 }
127
128 // Implements boilerplate logic for {match, split, replace, search} of the
129 // form:
130 //
131 // if (!IS_NULL_OR_UNDEFINED(object)) {
132 // var maybe_function = object[symbol];
133 // if (!IS_UNDEFINED(maybe_function)) {
134 // return %_Call(maybe_function, ...);
135 // }
136 // }
137 //
138 // Contains fast paths for Smi and RegExp objects.
139 typedef std::function<Node*()> NodeFunction0;
140 typedef std::function<Node*(Node* fn)> NodeFunction1;
141 void MaybeCallFunctionAtSymbol(Node* const context, Node* const object,
142 Node* const maybe_string,
143 Handle<Symbol> symbol,
144 const NodeFunction0& regexp_call,
145 const NodeFunction1& generic_call);
146 };
147
GenerateStringEqual(ResultMode mode)148 void StringBuiltinsAssembler::GenerateStringEqual(ResultMode mode) {
149 // Here's pseudo-code for the algorithm below in case of kDontNegateResult
150 // mode; for kNegateResult mode we properly negate the result.
151 //
152 // if (lhs == rhs) return true;
153 // if (lhs->length() != rhs->length()) return false;
154 // if (lhs->IsInternalizedString() && rhs->IsInternalizedString()) {
155 // return false;
156 // }
157 // if (lhs->IsSeqOneByteString() && rhs->IsSeqOneByteString()) {
158 // for (i = 0; i != lhs->length(); ++i) {
159 // if (lhs[i] != rhs[i]) return false;
160 // }
161 // return true;
162 // }
163 // if (lhs and/or rhs are indirect strings) {
164 // unwrap them and restart from the beginning;
165 // }
166 // return %StringEqual(lhs, rhs);
167
168 Variable var_left(this, MachineRepresentation::kTagged);
169 Variable var_right(this, MachineRepresentation::kTagged);
170 var_left.Bind(Parameter(0));
171 var_right.Bind(Parameter(1));
172 Node* context = Parameter(2);
173
174 Variable* input_vars[2] = {&var_left, &var_right};
175 Label if_equal(this), if_notequal(this), restart(this, 2, input_vars);
176 Goto(&restart);
177 Bind(&restart);
178 Node* lhs = var_left.value();
179 Node* rhs = var_right.value();
180
181 // Fast check to see if {lhs} and {rhs} refer to the same String object.
182 GotoIf(WordEqual(lhs, rhs), &if_equal);
183
184 // Load the length of {lhs} and {rhs}.
185 Node* lhs_length = LoadStringLength(lhs);
186 Node* rhs_length = LoadStringLength(rhs);
187
188 // Strings with different lengths cannot be equal.
189 GotoIf(WordNotEqual(lhs_length, rhs_length), &if_notequal);
190
191 // Load instance types of {lhs} and {rhs}.
192 Node* lhs_instance_type = LoadInstanceType(lhs);
193 Node* rhs_instance_type = LoadInstanceType(rhs);
194
195 // Combine the instance types into a single 16-bit value, so we can check
196 // both of them at once.
197 Node* both_instance_types = Word32Or(
198 lhs_instance_type, Word32Shl(rhs_instance_type, Int32Constant(8)));
199
200 // Check if both {lhs} and {rhs} are internalized. Since we already know
201 // that they're not the same object, they're not equal in that case.
202 int const kBothInternalizedMask =
203 kIsNotInternalizedMask | (kIsNotInternalizedMask << 8);
204 int const kBothInternalizedTag = kInternalizedTag | (kInternalizedTag << 8);
205 GotoIf(Word32Equal(Word32And(both_instance_types,
206 Int32Constant(kBothInternalizedMask)),
207 Int32Constant(kBothInternalizedTag)),
208 &if_notequal);
209
210 // Check that both {lhs} and {rhs} are flat one-byte strings, and that
211 // in case of ExternalStrings the data pointer is cached..
212 STATIC_ASSERT(kShortExternalStringTag != 0);
213 int const kBothDirectOneByteStringMask =
214 kStringEncodingMask | kIsIndirectStringMask | kShortExternalStringMask |
215 ((kStringEncodingMask | kIsIndirectStringMask | kShortExternalStringMask)
216 << 8);
217 int const kBothDirectOneByteStringTag =
218 kOneByteStringTag | (kOneByteStringTag << 8);
219 Label if_bothdirectonebytestrings(this), if_notbothdirectonebytestrings(this);
220 Branch(Word32Equal(Word32And(both_instance_types,
221 Int32Constant(kBothDirectOneByteStringMask)),
222 Int32Constant(kBothDirectOneByteStringTag)),
223 &if_bothdirectonebytestrings, &if_notbothdirectonebytestrings);
224
225 Bind(&if_bothdirectonebytestrings);
226 {
227 // Compute the effective offset of the first character.
228 Node* lhs_data = DirectStringData(lhs, lhs_instance_type);
229 Node* rhs_data = DirectStringData(rhs, rhs_instance_type);
230
231 // Compute the first offset after the string from the length.
232 Node* length = SmiUntag(lhs_length);
233
234 // Loop over the {lhs} and {rhs} strings to see if they are equal.
235 Variable var_offset(this, MachineType::PointerRepresentation());
236 Label loop(this, &var_offset);
237 var_offset.Bind(IntPtrConstant(0));
238 Goto(&loop);
239 Bind(&loop);
240 {
241 // If {offset} equals {end}, no difference was found, so the
242 // strings are equal.
243 Node* offset = var_offset.value();
244 GotoIf(WordEqual(offset, length), &if_equal);
245
246 // Load the next characters from {lhs} and {rhs}.
247 Node* lhs_value = Load(MachineType::Uint8(), lhs_data, offset);
248 Node* rhs_value = Load(MachineType::Uint8(), rhs_data, offset);
249
250 // Check if the characters match.
251 GotoIf(Word32NotEqual(lhs_value, rhs_value), &if_notequal);
252
253 // Advance to next character.
254 var_offset.Bind(IntPtrAdd(offset, IntPtrConstant(1)));
255 Goto(&loop);
256 }
257 }
258
259 Bind(&if_notbothdirectonebytestrings);
260 {
261 // Try to unwrap indirect strings, restart the above attempt on success.
262 MaybeDerefIndirectStrings(&var_left, lhs_instance_type, &var_right,
263 rhs_instance_type, &restart);
264 // TODO(bmeurer): Add support for two byte string equality checks.
265
266 Runtime::FunctionId function_id = (mode == ResultMode::kDontNegateResult)
267 ? Runtime::kStringEqual
268 : Runtime::kStringNotEqual;
269 TailCallRuntime(function_id, context, lhs, rhs);
270 }
271
272 Bind(&if_equal);
273 Return(BooleanConstant(mode == ResultMode::kDontNegateResult));
274
275 Bind(&if_notequal);
276 Return(BooleanConstant(mode == ResultMode::kNegateResult));
277 }
278
GenerateStringRelationalComparison(RelationalComparisonMode mode)279 void StringBuiltinsAssembler::GenerateStringRelationalComparison(
280 RelationalComparisonMode mode) {
281 Variable var_left(this, MachineRepresentation::kTagged);
282 Variable var_right(this, MachineRepresentation::kTagged);
283 var_left.Bind(Parameter(0));
284 var_right.Bind(Parameter(1));
285 Node* context = Parameter(2);
286
287 Variable* input_vars[2] = {&var_left, &var_right};
288 Label if_less(this), if_equal(this), if_greater(this);
289 Label restart(this, 2, input_vars);
290 Goto(&restart);
291 Bind(&restart);
292
293 Node* lhs = var_left.value();
294 Node* rhs = var_right.value();
295 // Fast check to see if {lhs} and {rhs} refer to the same String object.
296 GotoIf(WordEqual(lhs, rhs), &if_equal);
297
298 // Load instance types of {lhs} and {rhs}.
299 Node* lhs_instance_type = LoadInstanceType(lhs);
300 Node* rhs_instance_type = LoadInstanceType(rhs);
301
302 // Combine the instance types into a single 16-bit value, so we can check
303 // both of them at once.
304 Node* both_instance_types = Word32Or(
305 lhs_instance_type, Word32Shl(rhs_instance_type, Int32Constant(8)));
306
307 // Check that both {lhs} and {rhs} are flat one-byte strings.
308 int const kBothSeqOneByteStringMask =
309 kStringEncodingMask | kStringRepresentationMask |
310 ((kStringEncodingMask | kStringRepresentationMask) << 8);
311 int const kBothSeqOneByteStringTag =
312 kOneByteStringTag | kSeqStringTag |
313 ((kOneByteStringTag | kSeqStringTag) << 8);
314 Label if_bothonebyteseqstrings(this), if_notbothonebyteseqstrings(this);
315 Branch(Word32Equal(Word32And(both_instance_types,
316 Int32Constant(kBothSeqOneByteStringMask)),
317 Int32Constant(kBothSeqOneByteStringTag)),
318 &if_bothonebyteseqstrings, &if_notbothonebyteseqstrings);
319
320 Bind(&if_bothonebyteseqstrings);
321 {
322 // Load the length of {lhs} and {rhs}.
323 Node* lhs_length = LoadStringLength(lhs);
324 Node* rhs_length = LoadStringLength(rhs);
325
326 // Determine the minimum length.
327 Node* length = SmiMin(lhs_length, rhs_length);
328
329 // Compute the effective offset of the first character.
330 Node* begin =
331 IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag);
332
333 // Compute the first offset after the string from the length.
334 Node* end = IntPtrAdd(begin, SmiUntag(length));
335
336 // Loop over the {lhs} and {rhs} strings to see if they are equal.
337 Variable var_offset(this, MachineType::PointerRepresentation());
338 Label loop(this, &var_offset);
339 var_offset.Bind(begin);
340 Goto(&loop);
341 Bind(&loop);
342 {
343 // Check if {offset} equals {end}.
344 Node* offset = var_offset.value();
345 Label if_done(this), if_notdone(this);
346 Branch(WordEqual(offset, end), &if_done, &if_notdone);
347
348 Bind(&if_notdone);
349 {
350 // Load the next characters from {lhs} and {rhs}.
351 Node* lhs_value = Load(MachineType::Uint8(), lhs, offset);
352 Node* rhs_value = Load(MachineType::Uint8(), rhs, offset);
353
354 // Check if the characters match.
355 Label if_valueissame(this), if_valueisnotsame(this);
356 Branch(Word32Equal(lhs_value, rhs_value), &if_valueissame,
357 &if_valueisnotsame);
358
359 Bind(&if_valueissame);
360 {
361 // Advance to next character.
362 var_offset.Bind(IntPtrAdd(offset, IntPtrConstant(1)));
363 }
364 Goto(&loop);
365
366 Bind(&if_valueisnotsame);
367 Branch(Uint32LessThan(lhs_value, rhs_value), &if_less, &if_greater);
368 }
369
370 Bind(&if_done);
371 {
372 // All characters up to the min length are equal, decide based on
373 // string length.
374 GotoIf(SmiEqual(lhs_length, rhs_length), &if_equal);
375 BranchIfSmiLessThan(lhs_length, rhs_length, &if_less, &if_greater);
376 }
377 }
378 }
379
380 Bind(&if_notbothonebyteseqstrings);
381 {
382 // Try to unwrap indirect strings, restart the above attempt on success.
383 MaybeDerefIndirectStrings(&var_left, lhs_instance_type, &var_right,
384 rhs_instance_type, &restart);
385 // TODO(bmeurer): Add support for two byte string relational comparisons.
386 switch (mode) {
387 case RelationalComparisonMode::kLessThan:
388 TailCallRuntime(Runtime::kStringLessThan, context, lhs, rhs);
389 break;
390 case RelationalComparisonMode::kLessThanOrEqual:
391 TailCallRuntime(Runtime::kStringLessThanOrEqual, context, lhs, rhs);
392 break;
393 case RelationalComparisonMode::kGreaterThan:
394 TailCallRuntime(Runtime::kStringGreaterThan, context, lhs, rhs);
395 break;
396 case RelationalComparisonMode::kGreaterThanOrEqual:
397 TailCallRuntime(Runtime::kStringGreaterThanOrEqual, context, lhs,
398 rhs);
399 break;
400 }
401 }
402
403 Bind(&if_less);
404 switch (mode) {
405 case RelationalComparisonMode::kLessThan:
406 case RelationalComparisonMode::kLessThanOrEqual:
407 Return(BooleanConstant(true));
408 break;
409
410 case RelationalComparisonMode::kGreaterThan:
411 case RelationalComparisonMode::kGreaterThanOrEqual:
412 Return(BooleanConstant(false));
413 break;
414 }
415
416 Bind(&if_equal);
417 switch (mode) {
418 case RelationalComparisonMode::kLessThan:
419 case RelationalComparisonMode::kGreaterThan:
420 Return(BooleanConstant(false));
421 break;
422
423 case RelationalComparisonMode::kLessThanOrEqual:
424 case RelationalComparisonMode::kGreaterThanOrEqual:
425 Return(BooleanConstant(true));
426 break;
427 }
428
429 Bind(&if_greater);
430 switch (mode) {
431 case RelationalComparisonMode::kLessThan:
432 case RelationalComparisonMode::kLessThanOrEqual:
433 Return(BooleanConstant(false));
434 break;
435
436 case RelationalComparisonMode::kGreaterThan:
437 case RelationalComparisonMode::kGreaterThanOrEqual:
438 Return(BooleanConstant(true));
439 break;
440 }
441 }
442
TF_BUILTIN(StringEqual,StringBuiltinsAssembler)443 TF_BUILTIN(StringEqual, StringBuiltinsAssembler) {
444 GenerateStringEqual(ResultMode::kDontNegateResult);
445 }
446
TF_BUILTIN(StringNotEqual,StringBuiltinsAssembler)447 TF_BUILTIN(StringNotEqual, StringBuiltinsAssembler) {
448 GenerateStringEqual(ResultMode::kNegateResult);
449 }
450
TF_BUILTIN(StringLessThan,StringBuiltinsAssembler)451 TF_BUILTIN(StringLessThan, StringBuiltinsAssembler) {
452 GenerateStringRelationalComparison(RelationalComparisonMode::kLessThan);
453 }
454
TF_BUILTIN(StringLessThanOrEqual,StringBuiltinsAssembler)455 TF_BUILTIN(StringLessThanOrEqual, StringBuiltinsAssembler) {
456 GenerateStringRelationalComparison(
457 RelationalComparisonMode::kLessThanOrEqual);
458 }
459
TF_BUILTIN(StringGreaterThan,StringBuiltinsAssembler)460 TF_BUILTIN(StringGreaterThan, StringBuiltinsAssembler) {
461 GenerateStringRelationalComparison(RelationalComparisonMode::kGreaterThan);
462 }
463
TF_BUILTIN(StringGreaterThanOrEqual,StringBuiltinsAssembler)464 TF_BUILTIN(StringGreaterThanOrEqual, StringBuiltinsAssembler) {
465 GenerateStringRelationalComparison(
466 RelationalComparisonMode::kGreaterThanOrEqual);
467 }
468
TF_BUILTIN(StringCharAt,CodeStubAssembler)469 TF_BUILTIN(StringCharAt, CodeStubAssembler) {
470 Node* receiver = Parameter(0);
471 Node* position = Parameter(1);
472
473 // Load the character code at the {position} from the {receiver}.
474 Node* code = StringCharCodeAt(receiver, position, INTPTR_PARAMETERS);
475
476 // And return the single character string with only that {code}
477 Node* result = StringFromCharCode(code);
478 Return(result);
479 }
480
TF_BUILTIN(StringCharCodeAt,CodeStubAssembler)481 TF_BUILTIN(StringCharCodeAt, CodeStubAssembler) {
482 Node* receiver = Parameter(0);
483 Node* position = Parameter(1);
484
485 // Load the character code at the {position} from the {receiver}.
486 Node* code = StringCharCodeAt(receiver, position, INTPTR_PARAMETERS);
487
488 // And return it as TaggedSigned value.
489 // TODO(turbofan): Allow builtins to return values untagged.
490 Node* result = SmiFromWord32(code);
491 Return(result);
492 }
493
494 // -----------------------------------------------------------------------------
495 // ES6 section 21.1 String Objects
496
497 // ES6 section 21.1.2.1 String.fromCharCode ( ...codeUnits )
TF_BUILTIN(StringFromCharCode,CodeStubAssembler)498 TF_BUILTIN(StringFromCharCode, CodeStubAssembler) {
499 Node* argc = Parameter(BuiltinDescriptor::kArgumentsCount);
500 Node* context = Parameter(BuiltinDescriptor::kContext);
501
502 CodeStubArguments arguments(this, ChangeInt32ToIntPtr(argc));
503 // From now on use word-size argc value.
504 argc = arguments.GetLength();
505
506 // Check if we have exactly one argument (plus the implicit receiver), i.e.
507 // if the parent frame is not an arguments adaptor frame.
508 Label if_oneargument(this), if_notoneargument(this);
509 Branch(WordEqual(argc, IntPtrConstant(1)), &if_oneargument,
510 &if_notoneargument);
511
512 Bind(&if_oneargument);
513 {
514 // Single argument case, perform fast single character string cache lookup
515 // for one-byte code units, or fall back to creating a single character
516 // string on the fly otherwise.
517 Node* code = arguments.AtIndex(0);
518 Node* code32 = TruncateTaggedToWord32(context, code);
519 Node* code16 = Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit));
520 Node* result = StringFromCharCode(code16);
521 arguments.PopAndReturn(result);
522 }
523
524 Node* code16 = nullptr;
525 Bind(&if_notoneargument);
526 {
527 Label two_byte(this);
528 // Assume that the resulting string contains only one-byte characters.
529 Node* one_byte_result = AllocateSeqOneByteString(context, argc);
530
531 Variable max_index(this, MachineType::PointerRepresentation());
532 max_index.Bind(IntPtrConstant(0));
533
534 // Iterate over the incoming arguments, converting them to 8-bit character
535 // codes. Stop if any of the conversions generates a code that doesn't fit
536 // in 8 bits.
537 CodeStubAssembler::VariableList vars({&max_index}, zone());
538 arguments.ForEach(vars, [this, context, &two_byte, &max_index, &code16,
539 one_byte_result](Node* arg) {
540 Node* code32 = TruncateTaggedToWord32(context, arg);
541 code16 = Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit));
542
543 GotoIf(
544 Int32GreaterThan(code16, Int32Constant(String::kMaxOneByteCharCode)),
545 &two_byte);
546
547 // The {code16} fits into the SeqOneByteString {one_byte_result}.
548 Node* offset = ElementOffsetFromIndex(
549 max_index.value(), UINT8_ELEMENTS,
550 CodeStubAssembler::INTPTR_PARAMETERS,
551 SeqOneByteString::kHeaderSize - kHeapObjectTag);
552 StoreNoWriteBarrier(MachineRepresentation::kWord8, one_byte_result,
553 offset, code16);
554 max_index.Bind(IntPtrAdd(max_index.value(), IntPtrConstant(1)));
555 });
556 arguments.PopAndReturn(one_byte_result);
557
558 Bind(&two_byte);
559
560 // At least one of the characters in the string requires a 16-bit
561 // representation. Allocate a SeqTwoByteString to hold the resulting
562 // string.
563 Node* two_byte_result = AllocateSeqTwoByteString(context, argc);
564
565 // Copy the characters that have already been put in the 8-bit string into
566 // their corresponding positions in the new 16-bit string.
567 Node* zero = IntPtrConstant(0);
568 CopyStringCharacters(one_byte_result, two_byte_result, zero, zero,
569 max_index.value(), String::ONE_BYTE_ENCODING,
570 String::TWO_BYTE_ENCODING,
571 CodeStubAssembler::INTPTR_PARAMETERS);
572
573 // Write the character that caused the 8-bit to 16-bit fault.
574 Node* max_index_offset =
575 ElementOffsetFromIndex(max_index.value(), UINT16_ELEMENTS,
576 CodeStubAssembler::INTPTR_PARAMETERS,
577 SeqTwoByteString::kHeaderSize - kHeapObjectTag);
578 StoreNoWriteBarrier(MachineRepresentation::kWord16, two_byte_result,
579 max_index_offset, code16);
580 max_index.Bind(IntPtrAdd(max_index.value(), IntPtrConstant(1)));
581
582 // Resume copying the passed-in arguments from the same place where the
583 // 8-bit copy stopped, but this time copying over all of the characters
584 // using a 16-bit representation.
585 arguments.ForEach(
586 vars,
587 [this, context, two_byte_result, &max_index](Node* arg) {
588 Node* code32 = TruncateTaggedToWord32(context, arg);
589 Node* code16 =
590 Word32And(code32, Int32Constant(String::kMaxUtf16CodeUnit));
591
592 Node* offset = ElementOffsetFromIndex(
593 max_index.value(), UINT16_ELEMENTS,
594 CodeStubAssembler::INTPTR_PARAMETERS,
595 SeqTwoByteString::kHeaderSize - kHeapObjectTag);
596 StoreNoWriteBarrier(MachineRepresentation::kWord16, two_byte_result,
597 offset, code16);
598 max_index.Bind(IntPtrAdd(max_index.value(), IntPtrConstant(1)));
599 },
600 max_index.value());
601
602 arguments.PopAndReturn(two_byte_result);
603 }
604 }
605
606 namespace { // for String.fromCodePoint
607
IsValidCodePoint(Isolate * isolate,Handle<Object> value)608 bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
609 if (!value->IsNumber() && !Object::ToNumber(value).ToHandle(&value)) {
610 return false;
611 }
612
613 if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
614 value->Number()) {
615 return false;
616 }
617
618 if (value->Number() < 0 || value->Number() > 0x10FFFF) {
619 return false;
620 }
621
622 return true;
623 }
624
NextCodePoint(Isolate * isolate,BuiltinArguments args,int index)625 uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
626 Handle<Object> value = args.at(1 + index);
627 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, value, Object::ToNumber(value), -1);
628 if (!IsValidCodePoint(isolate, value)) {
629 isolate->Throw(*isolate->factory()->NewRangeError(
630 MessageTemplate::kInvalidCodePoint, value));
631 return -1;
632 }
633 return DoubleToUint32(value->Number());
634 }
635
636 } // namespace
637
638 // ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
BUILTIN(StringFromCodePoint)639 BUILTIN(StringFromCodePoint) {
640 HandleScope scope(isolate);
641 int const length = args.length() - 1;
642 if (length == 0) return isolate->heap()->empty_string();
643 DCHECK_LT(0, length);
644
645 // Optimistically assume that the resulting String contains only one byte
646 // characters.
647 List<uint8_t> one_byte_buffer(length);
648 uc32 code = 0;
649 int index;
650 for (index = 0; index < length; index++) {
651 code = NextCodePoint(isolate, args, index);
652 if (code < 0) {
653 return isolate->heap()->exception();
654 }
655 if (code > String::kMaxOneByteCharCode) {
656 break;
657 }
658 one_byte_buffer.Add(code);
659 }
660
661 if (index == length) {
662 RETURN_RESULT_OR_FAILURE(isolate, isolate->factory()->NewStringFromOneByte(
663 one_byte_buffer.ToConstVector()));
664 }
665
666 List<uc16> two_byte_buffer(length - index);
667
668 while (true) {
669 if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
670 two_byte_buffer.Add(code);
671 } else {
672 two_byte_buffer.Add(unibrow::Utf16::LeadSurrogate(code));
673 two_byte_buffer.Add(unibrow::Utf16::TrailSurrogate(code));
674 }
675
676 if (++index == length) {
677 break;
678 }
679 code = NextCodePoint(isolate, args, index);
680 if (code < 0) {
681 return isolate->heap()->exception();
682 }
683 }
684
685 Handle<SeqTwoByteString> result;
686 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
687 isolate, result,
688 isolate->factory()->NewRawTwoByteString(one_byte_buffer.length() +
689 two_byte_buffer.length()));
690
691 CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(),
692 one_byte_buffer.length());
693 CopyChars(result->GetChars() + one_byte_buffer.length(),
694 two_byte_buffer.ToConstVector().start(), two_byte_buffer.length());
695
696 return *result;
697 }
698
699 // ES6 section 21.1.3.1 String.prototype.charAt ( pos )
TF_BUILTIN(StringPrototypeCharAt,CodeStubAssembler)700 TF_BUILTIN(StringPrototypeCharAt, CodeStubAssembler) {
701 Node* receiver = Parameter(0);
702 Node* position = Parameter(1);
703 Node* context = Parameter(4);
704
705 // Check that {receiver} is coercible to Object and convert it to a String.
706 receiver = ToThisString(context, receiver, "String.prototype.charAt");
707
708 // Convert the {position} to a Smi and check that it's in bounds of the
709 // {receiver}.
710 {
711 Label return_emptystring(this, Label::kDeferred);
712 position =
713 ToInteger(context, position, CodeStubAssembler::kTruncateMinusZero);
714 GotoIfNot(TaggedIsSmi(position), &return_emptystring);
715
716 // Determine the actual length of the {receiver} String.
717 Node* receiver_length = LoadObjectField(receiver, String::kLengthOffset);
718
719 // Return "" if the Smi {position} is outside the bounds of the {receiver}.
720 Label if_positioninbounds(this);
721 Branch(SmiAboveOrEqual(position, receiver_length), &return_emptystring,
722 &if_positioninbounds);
723
724 Bind(&return_emptystring);
725 Return(EmptyStringConstant());
726
727 Bind(&if_positioninbounds);
728 }
729
730 // Load the character code at the {position} from the {receiver}.
731 Node* code = StringCharCodeAt(receiver, position);
732
733 // And return the single character string with only that {code}.
734 Node* result = StringFromCharCode(code);
735 Return(result);
736 }
737
738 // ES6 section 21.1.3.2 String.prototype.charCodeAt ( pos )
TF_BUILTIN(StringPrototypeCharCodeAt,CodeStubAssembler)739 TF_BUILTIN(StringPrototypeCharCodeAt, CodeStubAssembler) {
740 Node* receiver = Parameter(0);
741 Node* position = Parameter(1);
742 Node* context = Parameter(4);
743
744 // Check that {receiver} is coercible to Object and convert it to a String.
745 receiver = ToThisString(context, receiver, "String.prototype.charCodeAt");
746
747 // Convert the {position} to a Smi and check that it's in bounds of the
748 // {receiver}.
749 {
750 Label return_nan(this, Label::kDeferred);
751 position =
752 ToInteger(context, position, CodeStubAssembler::kTruncateMinusZero);
753 GotoIfNot(TaggedIsSmi(position), &return_nan);
754
755 // Determine the actual length of the {receiver} String.
756 Node* receiver_length = LoadObjectField(receiver, String::kLengthOffset);
757
758 // Return NaN if the Smi {position} is outside the bounds of the {receiver}.
759 Label if_positioninbounds(this);
760 Branch(SmiAboveOrEqual(position, receiver_length), &return_nan,
761 &if_positioninbounds);
762
763 Bind(&return_nan);
764 Return(NaNConstant());
765
766 Bind(&if_positioninbounds);
767 }
768
769 // Load the character at the {position} from the {receiver}.
770 Node* value = StringCharCodeAt(receiver, position);
771 Node* result = SmiFromWord32(value);
772 Return(result);
773 }
774
775 // ES6 section 21.1.3.6
776 // String.prototype.endsWith ( searchString [ , endPosition ] )
BUILTIN(StringPrototypeEndsWith)777 BUILTIN(StringPrototypeEndsWith) {
778 HandleScope handle_scope(isolate);
779 TO_THIS_STRING(str, "String.prototype.endsWith");
780
781 // Check if the search string is a regExp and fail if it is.
782 Handle<Object> search = args.atOrUndefined(isolate, 1);
783 Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
784 if (is_reg_exp.IsNothing()) {
785 DCHECK(isolate->has_pending_exception());
786 return isolate->heap()->exception();
787 }
788 if (is_reg_exp.FromJust()) {
789 THROW_NEW_ERROR_RETURN_FAILURE(
790 isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
791 isolate->factory()->NewStringFromStaticChars(
792 "String.prototype.endsWith")));
793 }
794 Handle<String> search_string;
795 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
796 Object::ToString(isolate, search));
797
798 Handle<Object> position = args.atOrUndefined(isolate, 2);
799 int end;
800
801 if (position->IsUndefined(isolate)) {
802 end = str->length();
803 } else {
804 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
805 Object::ToInteger(isolate, position));
806 end = str->ToValidIndex(*position);
807 }
808
809 int start = end - search_string->length();
810 if (start < 0) return isolate->heap()->false_value();
811
812 str = String::Flatten(str);
813 search_string = String::Flatten(search_string);
814
815 DisallowHeapAllocation no_gc; // ensure vectors stay valid
816 String::FlatContent str_content = str->GetFlatContent();
817 String::FlatContent search_content = search_string->GetFlatContent();
818
819 if (str_content.IsOneByte() && search_content.IsOneByte()) {
820 Vector<const uint8_t> str_vector = str_content.ToOneByteVector();
821 Vector<const uint8_t> search_vector = search_content.ToOneByteVector();
822
823 return isolate->heap()->ToBoolean(memcmp(str_vector.start() + start,
824 search_vector.start(),
825 search_string->length()) == 0);
826 }
827
828 FlatStringReader str_reader(isolate, str);
829 FlatStringReader search_reader(isolate, search_string);
830
831 for (int i = 0; i < search_string->length(); i++) {
832 if (str_reader.Get(start + i) != search_reader.Get(i)) {
833 return isolate->heap()->false_value();
834 }
835 }
836 return isolate->heap()->true_value();
837 }
838
839 // ES6 section 21.1.3.7
840 // String.prototype.includes ( searchString [ , position ] )
BUILTIN(StringPrototypeIncludes)841 BUILTIN(StringPrototypeIncludes) {
842 HandleScope handle_scope(isolate);
843 TO_THIS_STRING(str, "String.prototype.includes");
844
845 // Check if the search string is a regExp and fail if it is.
846 Handle<Object> search = args.atOrUndefined(isolate, 1);
847 Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
848 if (is_reg_exp.IsNothing()) {
849 DCHECK(isolate->has_pending_exception());
850 return isolate->heap()->exception();
851 }
852 if (is_reg_exp.FromJust()) {
853 THROW_NEW_ERROR_RETURN_FAILURE(
854 isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
855 isolate->factory()->NewStringFromStaticChars(
856 "String.prototype.includes")));
857 }
858 Handle<String> search_string;
859 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
860 Object::ToString(isolate, search));
861 Handle<Object> position;
862 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
863 isolate, position,
864 Object::ToInteger(isolate, args.atOrUndefined(isolate, 2)));
865
866 uint32_t index = str->ToValidIndex(*position);
867 int index_in_str = String::IndexOf(isolate, str, search_string, index);
868 return *isolate->factory()->ToBoolean(index_in_str != -1);
869 }
870
StringIndexOf(Node * receiver,Node * instance_type,Node * search_string,Node * search_string_instance_type,Node * position,std::function<void (Node *)> f_return)871 void StringBuiltinsAssembler::StringIndexOf(
872 Node* receiver, Node* instance_type, Node* search_string,
873 Node* search_string_instance_type, Node* position,
874 std::function<void(Node*)> f_return) {
875 CSA_ASSERT(this, IsString(receiver));
876 CSA_ASSERT(this, IsString(search_string));
877 CSA_ASSERT(this, TaggedIsSmi(position));
878
879 Label zero_length_needle(this),
880 call_runtime_unchecked(this, Label::kDeferred), return_minus_1(this),
881 check_search_string(this), continue_fast_path(this);
882
883 Node* const int_zero = IntPtrConstant(0);
884 Variable var_needle_byte(this, MachineType::PointerRepresentation(),
885 int_zero);
886 Variable var_string_addr(this, MachineType::PointerRepresentation(),
887 int_zero);
888
889 Node* needle_length = SmiUntag(LoadStringLength(search_string));
890 // Use faster/complex runtime fallback for long search strings.
891 GotoIf(IntPtrLessThan(IntPtrConstant(1), needle_length),
892 &call_runtime_unchecked);
893 Node* string_length = SmiUntag(LoadStringLength(receiver));
894 Node* start_position = IntPtrMax(SmiUntag(position), int_zero);
895
896 GotoIf(IntPtrEqual(int_zero, needle_length), &zero_length_needle);
897 // Check that the needle fits in the start position.
898 GotoIfNot(IntPtrLessThanOrEqual(needle_length,
899 IntPtrSub(string_length, start_position)),
900 &return_minus_1);
901
902 // Load the string address.
903 {
904 Label if_onebyte_sequential(this);
905 Label if_onebyte_external(this, Label::kDeferred);
906
907 // Only support one-byte strings on the fast path.
908 DispatchOnStringInstanceType(instance_type, &if_onebyte_sequential,
909 &if_onebyte_external, &call_runtime_unchecked);
910
911 Bind(&if_onebyte_sequential);
912 {
913 var_string_addr.Bind(
914 OneByteCharAddress(BitcastTaggedToWord(receiver), start_position));
915 Goto(&check_search_string);
916 }
917
918 Bind(&if_onebyte_external);
919 {
920 Node* const unpacked = TryDerefExternalString(receiver, instance_type,
921 &call_runtime_unchecked);
922 var_string_addr.Bind(OneByteCharAddress(unpacked, start_position));
923 Goto(&check_search_string);
924 }
925 }
926
927 // Load the needle character.
928 Bind(&check_search_string);
929 {
930 Label if_onebyte_sequential(this);
931 Label if_onebyte_external(this, Label::kDeferred);
932
933 DispatchOnStringInstanceType(search_string_instance_type,
934 &if_onebyte_sequential, &if_onebyte_external,
935 &call_runtime_unchecked);
936
937 Bind(&if_onebyte_sequential);
938 {
939 var_needle_byte.Bind(
940 ChangeInt32ToIntPtr(LoadOneByteChar(search_string, int_zero)));
941 Goto(&continue_fast_path);
942 }
943
944 Bind(&if_onebyte_external);
945 {
946 Node* const unpacked = TryDerefExternalString(
947 search_string, search_string_instance_type, &call_runtime_unchecked);
948 var_needle_byte.Bind(
949 ChangeInt32ToIntPtr(LoadOneByteChar(unpacked, int_zero)));
950 Goto(&continue_fast_path);
951 }
952 }
953
954 Bind(&continue_fast_path);
955 {
956 Node* needle_byte = var_needle_byte.value();
957 Node* string_addr = var_string_addr.value();
958 Node* search_length = IntPtrSub(string_length, start_position);
959 // Call out to the highly optimized memchr to perform the actual byte
960 // search.
961 Node* memchr =
962 ExternalConstant(ExternalReference::libc_memchr_function(isolate()));
963 Node* result_address =
964 CallCFunction3(MachineType::Pointer(), MachineType::Pointer(),
965 MachineType::IntPtr(), MachineType::UintPtr(), memchr,
966 string_addr, needle_byte, search_length);
967 GotoIf(WordEqual(result_address, int_zero), &return_minus_1);
968 Node* result_index =
969 IntPtrAdd(IntPtrSub(result_address, string_addr), start_position);
970 f_return(SmiTag(result_index));
971 }
972
973 Bind(&return_minus_1);
974 f_return(SmiConstant(-1));
975
976 Bind(&zero_length_needle);
977 {
978 Comment("0-length search_string");
979 f_return(SmiTag(IntPtrMin(string_length, start_position)));
980 }
981
982 Bind(&call_runtime_unchecked);
983 {
984 // Simplified version of the runtime call where the types of the arguments
985 // are already known due to type checks in this stub.
986 Comment("Call Runtime Unchecked");
987 Node* result = CallRuntime(Runtime::kStringIndexOfUnchecked, SmiConstant(0),
988 receiver, search_string, position);
989 f_return(result);
990 }
991 }
992
993 // ES6 String.prototype.indexOf(searchString [, position])
994 // #sec-string.prototype.indexof
995 // Unchecked helper for builtins lowering.
TF_BUILTIN(StringIndexOf,StringBuiltinsAssembler)996 TF_BUILTIN(StringIndexOf, StringBuiltinsAssembler) {
997 Node* receiver = Parameter(0);
998 Node* search_string = Parameter(1);
999 Node* position = Parameter(2);
1000
1001 Node* instance_type = LoadInstanceType(receiver);
1002 Node* search_string_instance_type = LoadInstanceType(search_string);
1003
1004 StringIndexOf(receiver, instance_type, search_string,
1005 search_string_instance_type, position,
1006 [this](Node* result) { this->Return(result); });
1007 }
1008
1009 // ES6 String.prototype.indexOf(searchString [, position])
1010 // #sec-string.prototype.indexof
TF_BUILTIN(StringPrototypeIndexOf,StringBuiltinsAssembler)1011 TF_BUILTIN(StringPrototypeIndexOf, StringBuiltinsAssembler) {
1012 Variable search_string(this, MachineRepresentation::kTagged),
1013 position(this, MachineRepresentation::kTagged);
1014 Label call_runtime(this), call_runtime_unchecked(this), argc_0(this),
1015 no_argc_0(this), argc_1(this), no_argc_1(this), argc_2(this),
1016 fast_path(this), return_minus_1(this);
1017
1018 Node* argc = Parameter(BuiltinDescriptor::kArgumentsCount);
1019 Node* context = Parameter(BuiltinDescriptor::kContext);
1020
1021 CodeStubArguments arguments(this, ChangeInt32ToIntPtr(argc));
1022 Node* receiver = arguments.GetReceiver();
1023 // From now on use word-size argc value.
1024 argc = arguments.GetLength();
1025
1026 GotoIf(IntPtrEqual(argc, IntPtrConstant(0)), &argc_0);
1027 GotoIf(IntPtrEqual(argc, IntPtrConstant(1)), &argc_1);
1028 Goto(&argc_2);
1029 Bind(&argc_0);
1030 {
1031 Comment("0 Argument case");
1032 Node* undefined = UndefinedConstant();
1033 search_string.Bind(undefined);
1034 position.Bind(undefined);
1035 Goto(&call_runtime);
1036 }
1037 Bind(&argc_1);
1038 {
1039 Comment("1 Argument case");
1040 search_string.Bind(arguments.AtIndex(0));
1041 position.Bind(SmiConstant(0));
1042 Goto(&fast_path);
1043 }
1044 Bind(&argc_2);
1045 {
1046 Comment("2 Argument case");
1047 search_string.Bind(arguments.AtIndex(0));
1048 position.Bind(arguments.AtIndex(1));
1049 GotoIfNot(TaggedIsSmi(position.value()), &call_runtime);
1050 Goto(&fast_path);
1051 }
1052
1053 Bind(&fast_path);
1054 {
1055 Comment("Fast Path");
1056 GotoIf(TaggedIsSmi(receiver), &call_runtime);
1057 Node* needle = search_string.value();
1058 GotoIf(TaggedIsSmi(needle), &call_runtime);
1059
1060 Node* instance_type = LoadInstanceType(receiver);
1061 GotoIfNot(IsStringInstanceType(instance_type), &call_runtime);
1062
1063 Node* needle_instance_type = LoadInstanceType(needle);
1064 GotoIfNot(IsStringInstanceType(needle_instance_type), &call_runtime);
1065
1066 StringIndexOf(
1067 receiver, instance_type, needle, needle_instance_type, position.value(),
1068 [&arguments](Node* result) { arguments.PopAndReturn(result); });
1069 }
1070
1071 Bind(&call_runtime);
1072 {
1073 Comment("Call Runtime");
1074 Node* result = CallRuntime(Runtime::kStringIndexOf, context, receiver,
1075 search_string.value(), position.value());
1076 arguments.PopAndReturn(result);
1077 }
1078 }
1079
1080 // ES6 section 21.1.3.9
1081 // String.prototype.lastIndexOf ( searchString [ , position ] )
BUILTIN(StringPrototypeLastIndexOf)1082 BUILTIN(StringPrototypeLastIndexOf) {
1083 HandleScope handle_scope(isolate);
1084 return String::LastIndexOf(isolate, args.receiver(),
1085 args.atOrUndefined(isolate, 1),
1086 args.atOrUndefined(isolate, 2));
1087 }
1088
1089 // ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
1090 //
1091 // This function is implementation specific. For now, we do not
1092 // do anything locale specific.
1093 // If internationalization is enabled, then i18n.js will override this function
1094 // and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeLocaleCompare)1095 BUILTIN(StringPrototypeLocaleCompare) {
1096 HandleScope handle_scope(isolate);
1097 DCHECK_EQ(2, args.length());
1098
1099 TO_THIS_STRING(str1, "String.prototype.localeCompare");
1100 Handle<String> str2;
1101 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
1102 Object::ToString(isolate, args.at(1)));
1103
1104 if (str1.is_identical_to(str2)) return Smi::kZero; // Equal.
1105 int str1_length = str1->length();
1106 int str2_length = str2->length();
1107
1108 // Decide trivial cases without flattening.
1109 if (str1_length == 0) {
1110 if (str2_length == 0) return Smi::kZero; // Equal.
1111 return Smi::FromInt(-str2_length);
1112 } else {
1113 if (str2_length == 0) return Smi::FromInt(str1_length);
1114 }
1115
1116 int end = str1_length < str2_length ? str1_length : str2_length;
1117
1118 // No need to flatten if we are going to find the answer on the first
1119 // character. At this point we know there is at least one character
1120 // in each string, due to the trivial case handling above.
1121 int d = str1->Get(0) - str2->Get(0);
1122 if (d != 0) return Smi::FromInt(d);
1123
1124 str1 = String::Flatten(str1);
1125 str2 = String::Flatten(str2);
1126
1127 DisallowHeapAllocation no_gc;
1128 String::FlatContent flat1 = str1->GetFlatContent();
1129 String::FlatContent flat2 = str2->GetFlatContent();
1130
1131 for (int i = 0; i < end; i++) {
1132 if (flat1.Get(i) != flat2.Get(i)) {
1133 return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
1134 }
1135 }
1136
1137 return Smi::FromInt(str1_length - str2_length);
1138 }
1139
1140 // ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
1141 //
1142 // Simply checks the argument is valid and returns the string itself.
1143 // If internationalization is enabled, then i18n.js will override this function
1144 // and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeNormalize)1145 BUILTIN(StringPrototypeNormalize) {
1146 HandleScope handle_scope(isolate);
1147 TO_THIS_STRING(string, "String.prototype.normalize");
1148
1149 Handle<Object> form_input = args.atOrUndefined(isolate, 1);
1150 if (form_input->IsUndefined(isolate)) return *string;
1151
1152 Handle<String> form;
1153 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
1154 Object::ToString(isolate, form_input));
1155
1156 if (!(String::Equals(form,
1157 isolate->factory()->NewStringFromStaticChars("NFC")) ||
1158 String::Equals(form,
1159 isolate->factory()->NewStringFromStaticChars("NFD")) ||
1160 String::Equals(form,
1161 isolate->factory()->NewStringFromStaticChars("NFKC")) ||
1162 String::Equals(form,
1163 isolate->factory()->NewStringFromStaticChars("NFKD")))) {
1164 Handle<String> valid_forms =
1165 isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
1166 THROW_NEW_ERROR_RETURN_FAILURE(
1167 isolate,
1168 NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
1169 }
1170
1171 return *string;
1172 }
1173
IsNullOrUndefined(Node * const value)1174 compiler::Node* StringBuiltinsAssembler::IsNullOrUndefined(Node* const value) {
1175 return Word32Or(IsUndefined(value), IsNull(value));
1176 }
1177
RequireObjectCoercible(Node * const context,Node * const value,const char * method_name)1178 void StringBuiltinsAssembler::RequireObjectCoercible(Node* const context,
1179 Node* const value,
1180 const char* method_name) {
1181 Label out(this), throw_exception(this, Label::kDeferred);
1182 Branch(IsNullOrUndefined(value), &throw_exception, &out);
1183
1184 Bind(&throw_exception);
1185 TailCallRuntime(
1186 Runtime::kThrowCalledOnNullOrUndefined, context,
1187 HeapConstant(factory()->NewStringFromAsciiChecked(method_name, TENURED)));
1188
1189 Bind(&out);
1190 }
1191
MaybeCallFunctionAtSymbol(Node * const context,Node * const object,Node * const maybe_string,Handle<Symbol> symbol,const NodeFunction0 & regexp_call,const NodeFunction1 & generic_call)1192 void StringBuiltinsAssembler::MaybeCallFunctionAtSymbol(
1193 Node* const context, Node* const object, Node* const maybe_string,
1194 Handle<Symbol> symbol, const NodeFunction0& regexp_call,
1195 const NodeFunction1& generic_call) {
1196 Label out(this);
1197
1198 // Smis definitely don't have an attached symbol.
1199 GotoIf(TaggedIsSmi(object), &out);
1200
1201 Node* const object_map = LoadMap(object);
1202
1203 // Skip the slow lookup for Strings.
1204 {
1205 Label next(this);
1206
1207 GotoIfNot(IsStringInstanceType(LoadMapInstanceType(object_map)), &next);
1208
1209 Node* const native_context = LoadNativeContext(context);
1210 Node* const initial_proto_initial_map = LoadContextElement(
1211 native_context, Context::STRING_FUNCTION_PROTOTYPE_MAP_INDEX);
1212
1213 Node* const string_fun =
1214 LoadContextElement(native_context, Context::STRING_FUNCTION_INDEX);
1215 Node* const initial_map =
1216 LoadObjectField(string_fun, JSFunction::kPrototypeOrInitialMapOffset);
1217 Node* const proto_map = LoadMap(LoadMapPrototype(initial_map));
1218
1219 Branch(WordEqual(proto_map, initial_proto_initial_map), &out, &next);
1220
1221 Bind(&next);
1222 }
1223
1224 // Take the fast path for RegExps.
1225 // There's two conditions: {object} needs to be a fast regexp, and
1226 // {maybe_string} must be a string (we can't call ToString on the fast path
1227 // since it may mutate {object}).
1228 {
1229 Label stub_call(this), slow_lookup(this);
1230
1231 GotoIf(TaggedIsSmi(maybe_string), &slow_lookup);
1232 GotoIfNot(IsString(maybe_string), &slow_lookup);
1233
1234 RegExpBuiltinsAssembler regexp_asm(state());
1235 regexp_asm.BranchIfFastRegExp(context, object, object_map, &stub_call,
1236 &slow_lookup);
1237
1238 Bind(&stub_call);
1239 Return(regexp_call());
1240
1241 Bind(&slow_lookup);
1242 }
1243
1244 GotoIf(IsNullOrUndefined(object), &out);
1245
1246 // Fall back to a slow lookup of {object[symbol]}.
1247
1248 Callable getproperty_callable = CodeFactory::GetProperty(isolate());
1249 Node* const key = HeapConstant(symbol);
1250 Node* const maybe_func = CallStub(getproperty_callable, context, object, key);
1251
1252 GotoIf(IsUndefined(maybe_func), &out);
1253
1254 // Attempt to call the function.
1255
1256 Node* const result = generic_call(maybe_func);
1257 Return(result);
1258
1259 Bind(&out);
1260 }
1261
1262 // ES6 section 21.1.3.16 String.prototype.replace ( search, replace )
TF_BUILTIN(StringPrototypeReplace,StringBuiltinsAssembler)1263 TF_BUILTIN(StringPrototypeReplace, StringBuiltinsAssembler) {
1264 Label out(this);
1265
1266 Node* const receiver = Parameter(0);
1267 Node* const search = Parameter(1);
1268 Node* const replace = Parameter(2);
1269 Node* const context = Parameter(5);
1270
1271 Node* const smi_zero = SmiConstant(0);
1272
1273 RequireObjectCoercible(context, receiver, "String.prototype.replace");
1274
1275 // Redirect to replacer method if {search[@@replace]} is not undefined.
1276
1277 MaybeCallFunctionAtSymbol(
1278 context, search, receiver, isolate()->factory()->replace_symbol(),
1279 [=]() {
1280 Callable tostring_callable = CodeFactory::ToString(isolate());
1281 Node* const subject_string =
1282 CallStub(tostring_callable, context, receiver);
1283
1284 Callable replace_callable = CodeFactory::RegExpReplace(isolate());
1285 return CallStub(replace_callable, context, search, subject_string,
1286 replace);
1287 },
1288 [=](Node* fn) {
1289 Callable call_callable = CodeFactory::Call(isolate());
1290 return CallJS(call_callable, context, fn, search, receiver, replace);
1291 });
1292
1293 // Convert {receiver} and {search} to strings.
1294
1295 Callable tostring_callable = CodeFactory::ToString(isolate());
1296 Callable indexof_callable = CodeFactory::StringIndexOf(isolate());
1297
1298 Node* const subject_string = CallStub(tostring_callable, context, receiver);
1299 Node* const search_string = CallStub(tostring_callable, context, search);
1300
1301 Node* const subject_length = LoadStringLength(subject_string);
1302 Node* const search_length = LoadStringLength(search_string);
1303
1304 // Fast-path single-char {search}, long {receiver}, and simple string
1305 // {replace}.
1306 {
1307 Label next(this);
1308
1309 GotoIfNot(SmiEqual(search_length, SmiConstant(1)), &next);
1310 GotoIfNot(SmiGreaterThan(subject_length, SmiConstant(0xFF)), &next);
1311 GotoIf(TaggedIsSmi(replace), &next);
1312 GotoIfNot(IsString(replace), &next);
1313
1314 Node* const dollar_string = HeapConstant(
1315 isolate()->factory()->LookupSingleCharacterStringFromCode('$'));
1316 Node* const dollar_ix =
1317 CallStub(indexof_callable, context, replace, dollar_string, smi_zero);
1318 GotoIfNot(SmiIsNegative(dollar_ix), &next);
1319
1320 // Searching by traversing a cons string tree and replace with cons of
1321 // slices works only when the replaced string is a single character, being
1322 // replaced by a simple string and only pays off for long strings.
1323 // TODO(jgruber): Reevaluate if this is still beneficial.
1324 // TODO(jgruber): TailCallRuntime when it correctly handles adapter frames.
1325 Return(CallRuntime(Runtime::kStringReplaceOneCharWithString, context,
1326 subject_string, search_string, replace));
1327
1328 Bind(&next);
1329 }
1330
1331 // TODO(jgruber): Extend StringIndexOf to handle two-byte strings and
1332 // longer substrings - we can handle up to 8 chars (one-byte) / 4 chars
1333 // (2-byte).
1334
1335 Node* const match_start_index = CallStub(
1336 indexof_callable, context, subject_string, search_string, smi_zero);
1337 CSA_ASSERT(this, TaggedIsSmi(match_start_index));
1338
1339 // Early exit if no match found.
1340 {
1341 Label next(this), return_subject(this);
1342
1343 GotoIfNot(SmiIsNegative(match_start_index), &next);
1344
1345 // The spec requires to perform ToString(replace) if the {replace} is not
1346 // callable even if we are going to exit here.
1347 // Since ToString() being applied to Smi does not have side effects for
1348 // numbers we can skip it.
1349 GotoIf(TaggedIsSmi(replace), &return_subject);
1350 GotoIf(IsCallableMap(LoadMap(replace)), &return_subject);
1351
1352 // TODO(jgruber): Could introduce ToStringSideeffectsStub which only
1353 // performs observable parts of ToString.
1354 CallStub(tostring_callable, context, replace);
1355 Goto(&return_subject);
1356
1357 Bind(&return_subject);
1358 Return(subject_string);
1359
1360 Bind(&next);
1361 }
1362
1363 Node* const match_end_index = SmiAdd(match_start_index, search_length);
1364
1365 Callable substring_callable = CodeFactory::SubString(isolate());
1366 Callable stringadd_callable =
1367 CodeFactory::StringAdd(isolate(), STRING_ADD_CHECK_NONE, NOT_TENURED);
1368
1369 Variable var_result(this, MachineRepresentation::kTagged,
1370 EmptyStringConstant());
1371
1372 // Compute the prefix.
1373 {
1374 Label next(this);
1375
1376 GotoIf(SmiEqual(match_start_index, smi_zero), &next);
1377 Node* const prefix = CallStub(substring_callable, context, subject_string,
1378 smi_zero, match_start_index);
1379 var_result.Bind(prefix);
1380
1381 Goto(&next);
1382 Bind(&next);
1383 }
1384
1385 // Compute the string to replace with.
1386
1387 Label if_iscallablereplace(this), if_notcallablereplace(this);
1388 GotoIf(TaggedIsSmi(replace), &if_notcallablereplace);
1389 Branch(IsCallableMap(LoadMap(replace)), &if_iscallablereplace,
1390 &if_notcallablereplace);
1391
1392 Bind(&if_iscallablereplace);
1393 {
1394 Callable call_callable = CodeFactory::Call(isolate());
1395 Node* const replacement =
1396 CallJS(call_callable, context, replace, UndefinedConstant(),
1397 search_string, match_start_index, subject_string);
1398 Node* const replacement_string =
1399 CallStub(tostring_callable, context, replacement);
1400 var_result.Bind(CallStub(stringadd_callable, context, var_result.value(),
1401 replacement_string));
1402 Goto(&out);
1403 }
1404
1405 Bind(&if_notcallablereplace);
1406 {
1407 Node* const replace_string = CallStub(tostring_callable, context, replace);
1408
1409 // TODO(jgruber): Simplified GetSubstitution implementation in CSA.
1410 Node* const matched = CallStub(substring_callable, context, subject_string,
1411 match_start_index, match_end_index);
1412 Node* const replacement_string =
1413 CallRuntime(Runtime::kGetSubstitution, context, matched, subject_string,
1414 match_start_index, replace_string);
1415 var_result.Bind(CallStub(stringadd_callable, context, var_result.value(),
1416 replacement_string));
1417 Goto(&out);
1418 }
1419
1420 Bind(&out);
1421 {
1422 Node* const suffix = CallStub(substring_callable, context, subject_string,
1423 match_end_index, subject_length);
1424 Node* const result =
1425 CallStub(stringadd_callable, context, var_result.value(), suffix);
1426 Return(result);
1427 }
1428 }
1429
1430 // ES6 section 21.1.3.19 String.prototype.split ( separator, limit )
TF_BUILTIN(StringPrototypeSplit,StringBuiltinsAssembler)1431 TF_BUILTIN(StringPrototypeSplit, StringBuiltinsAssembler) {
1432 Label out(this);
1433
1434 Node* const receiver = Parameter(0);
1435 Node* const separator = Parameter(1);
1436 Node* const limit = Parameter(2);
1437 Node* const context = Parameter(5);
1438
1439 Node* const smi_zero = SmiConstant(0);
1440
1441 RequireObjectCoercible(context, receiver, "String.prototype.split");
1442
1443 // Redirect to splitter method if {separator[@@split]} is not undefined.
1444
1445 MaybeCallFunctionAtSymbol(
1446 context, separator, receiver, isolate()->factory()->split_symbol(),
1447 [=]() {
1448 Callable tostring_callable = CodeFactory::ToString(isolate());
1449 Node* const subject_string =
1450 CallStub(tostring_callable, context, receiver);
1451
1452 Callable split_callable = CodeFactory::RegExpSplit(isolate());
1453 return CallStub(split_callable, context, separator, subject_string,
1454 limit);
1455 },
1456 [=](Node* fn) {
1457 Callable call_callable = CodeFactory::Call(isolate());
1458 return CallJS(call_callable, context, fn, separator, receiver, limit);
1459 });
1460
1461 // String and integer conversions.
1462 // TODO(jgruber): The old implementation used Uint32Max instead of SmiMax -
1463 // but AFAIK there should not be a difference since arrays are capped at Smi
1464 // lengths.
1465
1466 Callable tostring_callable = CodeFactory::ToString(isolate());
1467 Node* const subject_string = CallStub(tostring_callable, context, receiver);
1468 Node* const limit_number =
1469 Select(IsUndefined(limit), [=]() { return SmiConstant(Smi::kMaxValue); },
1470 [=]() { return ToUint32(context, limit); },
1471 MachineRepresentation::kTagged);
1472 Node* const separator_string =
1473 CallStub(tostring_callable, context, separator);
1474
1475 // Shortcut for {limit} == 0.
1476 {
1477 Label next(this);
1478 GotoIfNot(SmiEqual(limit_number, smi_zero), &next);
1479
1480 const ElementsKind kind = FAST_ELEMENTS;
1481 Node* const native_context = LoadNativeContext(context);
1482 Node* const array_map = LoadJSArrayElementsMap(kind, native_context);
1483
1484 Node* const length = smi_zero;
1485 Node* const capacity = IntPtrConstant(0);
1486 Node* const result = AllocateJSArray(kind, array_map, capacity, length);
1487
1488 Return(result);
1489
1490 Bind(&next);
1491 }
1492
1493 // ECMA-262 says that if {separator} is undefined, the result should
1494 // be an array of size 1 containing the entire string.
1495 {
1496 Label next(this);
1497 GotoIfNot(IsUndefined(separator), &next);
1498
1499 const ElementsKind kind = FAST_ELEMENTS;
1500 Node* const native_context = LoadNativeContext(context);
1501 Node* const array_map = LoadJSArrayElementsMap(kind, native_context);
1502
1503 Node* const length = SmiConstant(1);
1504 Node* const capacity = IntPtrConstant(1);
1505 Node* const result = AllocateJSArray(kind, array_map, capacity, length);
1506
1507 Node* const fixed_array = LoadElements(result);
1508 StoreFixedArrayElement(fixed_array, 0, subject_string);
1509
1510 Return(result);
1511
1512 Bind(&next);
1513 }
1514
1515 // If the separator string is empty then return the elements in the subject.
1516 {
1517 Label next(this);
1518 GotoIfNot(SmiEqual(LoadStringLength(separator_string), smi_zero), &next);
1519
1520 Node* const result = CallRuntime(Runtime::kStringToArray, context,
1521 subject_string, limit_number);
1522 Return(result);
1523
1524 Bind(&next);
1525 }
1526
1527 Node* const result =
1528 CallRuntime(Runtime::kStringSplit, context, subject_string,
1529 separator_string, limit_number);
1530 Return(result);
1531 }
1532
1533 // ES6 section B.2.3.1 String.prototype.substr ( start, length )
TF_BUILTIN(StringPrototypeSubstr,CodeStubAssembler)1534 TF_BUILTIN(StringPrototypeSubstr, CodeStubAssembler) {
1535 Label out(this), handle_length(this);
1536
1537 Variable var_start(this, MachineRepresentation::kTagged);
1538 Variable var_length(this, MachineRepresentation::kTagged);
1539
1540 Node* const receiver = Parameter(0);
1541 Node* const start = Parameter(1);
1542 Node* const length = Parameter(2);
1543 Node* const context = Parameter(5);
1544
1545 Node* const zero = SmiConstant(Smi::kZero);
1546
1547 // Check that {receiver} is coercible to Object and convert it to a String.
1548 Node* const string =
1549 ToThisString(context, receiver, "String.prototype.substr");
1550
1551 Node* const string_length = LoadStringLength(string);
1552
1553 // Conversions and bounds-checks for {start}.
1554 {
1555 Node* const start_int =
1556 ToInteger(context, start, CodeStubAssembler::kTruncateMinusZero);
1557
1558 Label if_issmi(this), if_isheapnumber(this, Label::kDeferred);
1559 Branch(TaggedIsSmi(start_int), &if_issmi, &if_isheapnumber);
1560
1561 Bind(&if_issmi);
1562 {
1563 Node* const length_plus_start = SmiAdd(string_length, start_int);
1564 var_start.Bind(Select(SmiLessThan(start_int, zero),
1565 [&] { return SmiMax(length_plus_start, zero); },
1566 [&] { return start_int; },
1567 MachineRepresentation::kTagged));
1568 Goto(&handle_length);
1569 }
1570
1571 Bind(&if_isheapnumber);
1572 {
1573 // If {start} is a heap number, it is definitely out of bounds. If it is
1574 // negative, {start} = max({string_length} + {start}),0) = 0'. If it is
1575 // positive, set {start} to {string_length} which ultimately results in
1576 // returning an empty string.
1577 Node* const float_zero = Float64Constant(0.);
1578 Node* const start_float = LoadHeapNumberValue(start_int);
1579 var_start.Bind(SelectTaggedConstant(
1580 Float64LessThan(start_float, float_zero), zero, string_length));
1581 Goto(&handle_length);
1582 }
1583 }
1584
1585 // Conversions and bounds-checks for {length}.
1586 Bind(&handle_length);
1587 {
1588 Label if_issmi(this), if_isheapnumber(this, Label::kDeferred);
1589
1590 // Default to {string_length} if {length} is undefined.
1591 {
1592 Label if_isundefined(this, Label::kDeferred), if_isnotundefined(this);
1593 Branch(WordEqual(length, UndefinedConstant()), &if_isundefined,
1594 &if_isnotundefined);
1595
1596 Bind(&if_isundefined);
1597 var_length.Bind(string_length);
1598 Goto(&if_issmi);
1599
1600 Bind(&if_isnotundefined);
1601 var_length.Bind(
1602 ToInteger(context, length, CodeStubAssembler::kTruncateMinusZero));
1603 }
1604
1605 Branch(TaggedIsSmi(var_length.value()), &if_issmi, &if_isheapnumber);
1606
1607 // Set {length} to min(max({length}, 0), {string_length} - {start}
1608 Bind(&if_issmi);
1609 {
1610 Node* const positive_length = SmiMax(var_length.value(), zero);
1611
1612 Node* const minimal_length = SmiSub(string_length, var_start.value());
1613 var_length.Bind(SmiMin(positive_length, minimal_length));
1614
1615 GotoIfNot(SmiLessThanOrEqual(var_length.value(), zero), &out);
1616 Return(EmptyStringConstant());
1617 }
1618
1619 Bind(&if_isheapnumber);
1620 {
1621 // If {length} is a heap number, it is definitely out of bounds. There are
1622 // two cases according to the spec: if it is negative, "" is returned; if
1623 // it is positive, then length is set to {string_length} - {start}.
1624
1625 CSA_ASSERT(this, IsHeapNumberMap(LoadMap(var_length.value())));
1626
1627 Label if_isnegative(this), if_ispositive(this);
1628 Node* const float_zero = Float64Constant(0.);
1629 Node* const length_float = LoadHeapNumberValue(var_length.value());
1630 Branch(Float64LessThan(length_float, float_zero), &if_isnegative,
1631 &if_ispositive);
1632
1633 Bind(&if_isnegative);
1634 Return(EmptyStringConstant());
1635
1636 Bind(&if_ispositive);
1637 {
1638 var_length.Bind(SmiSub(string_length, var_start.value()));
1639 GotoIfNot(SmiLessThanOrEqual(var_length.value(), zero), &out);
1640 Return(EmptyStringConstant());
1641 }
1642 }
1643 }
1644
1645 Bind(&out);
1646 {
1647 Node* const end = SmiAdd(var_start.value(), var_length.value());
1648 Node* const result = SubString(context, string, var_start.value(), end);
1649 Return(result);
1650 }
1651 }
1652
ToSmiBetweenZeroAnd(Node * context,Node * value,Node * limit)1653 compiler::Node* StringBuiltinsAssembler::ToSmiBetweenZeroAnd(Node* context,
1654 Node* value,
1655 Node* limit) {
1656 Label out(this);
1657 Variable var_result(this, MachineRepresentation::kTagged);
1658
1659 Node* const value_int =
1660 this->ToInteger(context, value, CodeStubAssembler::kTruncateMinusZero);
1661
1662 Label if_issmi(this), if_isnotsmi(this, Label::kDeferred);
1663 Branch(TaggedIsSmi(value_int), &if_issmi, &if_isnotsmi);
1664
1665 Bind(&if_issmi);
1666 {
1667 Label if_isinbounds(this), if_isoutofbounds(this, Label::kDeferred);
1668 Branch(SmiAbove(value_int, limit), &if_isoutofbounds, &if_isinbounds);
1669
1670 Bind(&if_isinbounds);
1671 {
1672 var_result.Bind(value_int);
1673 Goto(&out);
1674 }
1675
1676 Bind(&if_isoutofbounds);
1677 {
1678 Node* const zero = SmiConstant(Smi::kZero);
1679 var_result.Bind(
1680 SelectTaggedConstant(SmiLessThan(value_int, zero), zero, limit));
1681 Goto(&out);
1682 }
1683 }
1684
1685 Bind(&if_isnotsmi);
1686 {
1687 // {value} is a heap number - in this case, it is definitely out of bounds.
1688 CSA_ASSERT(this, IsHeapNumberMap(LoadMap(value_int)));
1689
1690 Node* const float_zero = Float64Constant(0.);
1691 Node* const smi_zero = SmiConstant(Smi::kZero);
1692 Node* const value_float = LoadHeapNumberValue(value_int);
1693 var_result.Bind(SelectTaggedConstant(
1694 Float64LessThan(value_float, float_zero), smi_zero, limit));
1695 Goto(&out);
1696 }
1697
1698 Bind(&out);
1699 return var_result.value();
1700 }
1701
1702 // ES6 section 21.1.3.19 String.prototype.substring ( start, end )
TF_BUILTIN(StringPrototypeSubstring,StringBuiltinsAssembler)1703 TF_BUILTIN(StringPrototypeSubstring, StringBuiltinsAssembler) {
1704 Label out(this);
1705
1706 Variable var_start(this, MachineRepresentation::kTagged);
1707 Variable var_end(this, MachineRepresentation::kTagged);
1708
1709 Node* const receiver = Parameter(0);
1710 Node* const start = Parameter(1);
1711 Node* const end = Parameter(2);
1712 Node* const context = Parameter(5);
1713
1714 // Check that {receiver} is coercible to Object and convert it to a String.
1715 Node* const string =
1716 ToThisString(context, receiver, "String.prototype.substring");
1717
1718 Node* const length = LoadStringLength(string);
1719
1720 // Conversion and bounds-checks for {start}.
1721 var_start.Bind(ToSmiBetweenZeroAnd(context, start, length));
1722
1723 // Conversion and bounds-checks for {end}.
1724 {
1725 var_end.Bind(length);
1726 GotoIf(WordEqual(end, UndefinedConstant()), &out);
1727
1728 var_end.Bind(ToSmiBetweenZeroAnd(context, end, length));
1729
1730 Label if_endislessthanstart(this);
1731 Branch(SmiLessThan(var_end.value(), var_start.value()),
1732 &if_endislessthanstart, &out);
1733
1734 Bind(&if_endislessthanstart);
1735 {
1736 Node* const tmp = var_end.value();
1737 var_end.Bind(var_start.value());
1738 var_start.Bind(tmp);
1739 Goto(&out);
1740 }
1741 }
1742
1743 Bind(&out);
1744 {
1745 Node* result =
1746 SubString(context, string, var_start.value(), var_end.value());
1747 Return(result);
1748 }
1749 }
1750
BUILTIN(StringPrototypeStartsWith)1751 BUILTIN(StringPrototypeStartsWith) {
1752 HandleScope handle_scope(isolate);
1753 TO_THIS_STRING(str, "String.prototype.startsWith");
1754
1755 // Check if the search string is a regExp and fail if it is.
1756 Handle<Object> search = args.atOrUndefined(isolate, 1);
1757 Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
1758 if (is_reg_exp.IsNothing()) {
1759 DCHECK(isolate->has_pending_exception());
1760 return isolate->heap()->exception();
1761 }
1762 if (is_reg_exp.FromJust()) {
1763 THROW_NEW_ERROR_RETURN_FAILURE(
1764 isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
1765 isolate->factory()->NewStringFromStaticChars(
1766 "String.prototype.startsWith")));
1767 }
1768 Handle<String> search_string;
1769 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1770 Object::ToString(isolate, search));
1771
1772 Handle<Object> position = args.atOrUndefined(isolate, 2);
1773 int start;
1774
1775 if (position->IsUndefined(isolate)) {
1776 start = 0;
1777 } else {
1778 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1779 Object::ToInteger(isolate, position));
1780 start = str->ToValidIndex(*position);
1781 }
1782
1783 if (start + search_string->length() > str->length()) {
1784 return isolate->heap()->false_value();
1785 }
1786
1787 FlatStringReader str_reader(isolate, String::Flatten(str));
1788 FlatStringReader search_reader(isolate, String::Flatten(search_string));
1789
1790 for (int i = 0; i < search_string->length(); i++) {
1791 if (str_reader.Get(start + i) != search_reader.Get(i)) {
1792 return isolate->heap()->false_value();
1793 }
1794 }
1795 return isolate->heap()->true_value();
1796 }
1797
1798 // ES6 section 21.1.3.25 String.prototype.toString ()
TF_BUILTIN(StringPrototypeToString,CodeStubAssembler)1799 TF_BUILTIN(StringPrototypeToString, CodeStubAssembler) {
1800 Node* receiver = Parameter(0);
1801 Node* context = Parameter(3);
1802
1803 Node* result = ToThisValue(context, receiver, PrimitiveType::kString,
1804 "String.prototype.toString");
1805 Return(result);
1806 }
1807
1808 // ES6 section 21.1.3.27 String.prototype.trim ()
BUILTIN(StringPrototypeTrim)1809 BUILTIN(StringPrototypeTrim) {
1810 HandleScope scope(isolate);
1811 TO_THIS_STRING(string, "String.prototype.trim");
1812 return *String::Trim(string, String::kTrim);
1813 }
1814
1815 // Non-standard WebKit extension
BUILTIN(StringPrototypeTrimLeft)1816 BUILTIN(StringPrototypeTrimLeft) {
1817 HandleScope scope(isolate);
1818 TO_THIS_STRING(string, "String.prototype.trimLeft");
1819 return *String::Trim(string, String::kTrimLeft);
1820 }
1821
1822 // Non-standard WebKit extension
BUILTIN(StringPrototypeTrimRight)1823 BUILTIN(StringPrototypeTrimRight) {
1824 HandleScope scope(isolate);
1825 TO_THIS_STRING(string, "String.prototype.trimRight");
1826 return *String::Trim(string, String::kTrimRight);
1827 }
1828
1829 // ES6 section 21.1.3.28 String.prototype.valueOf ( )
TF_BUILTIN(StringPrototypeValueOf,CodeStubAssembler)1830 TF_BUILTIN(StringPrototypeValueOf, CodeStubAssembler) {
1831 Node* receiver = Parameter(0);
1832 Node* context = Parameter(3);
1833
1834 Node* result = ToThisValue(context, receiver, PrimitiveType::kString,
1835 "String.prototype.valueOf");
1836 Return(result);
1837 }
1838
TF_BUILTIN(StringPrototypeIterator,CodeStubAssembler)1839 TF_BUILTIN(StringPrototypeIterator, CodeStubAssembler) {
1840 Node* receiver = Parameter(0);
1841 Node* context = Parameter(3);
1842
1843 Node* string =
1844 ToThisString(context, receiver, "String.prototype[Symbol.iterator]");
1845
1846 Node* native_context = LoadNativeContext(context);
1847 Node* map =
1848 LoadContextElement(native_context, Context::STRING_ITERATOR_MAP_INDEX);
1849 Node* iterator = Allocate(JSStringIterator::kSize);
1850 StoreMapNoWriteBarrier(iterator, map);
1851 StoreObjectFieldRoot(iterator, JSValue::kPropertiesOffset,
1852 Heap::kEmptyFixedArrayRootIndex);
1853 StoreObjectFieldRoot(iterator, JSObject::kElementsOffset,
1854 Heap::kEmptyFixedArrayRootIndex);
1855 StoreObjectFieldNoWriteBarrier(iterator, JSStringIterator::kStringOffset,
1856 string);
1857 Node* index = SmiConstant(Smi::kZero);
1858 StoreObjectFieldNoWriteBarrier(iterator, JSStringIterator::kNextIndexOffset,
1859 index);
1860 Return(iterator);
1861 }
1862
1863 // Return the |word32| codepoint at {index}. Supports SeqStrings and
1864 // ExternalStrings.
LoadSurrogatePairAt(compiler::Node * string,compiler::Node * length,compiler::Node * index,UnicodeEncoding encoding)1865 compiler::Node* StringBuiltinsAssembler::LoadSurrogatePairAt(
1866 compiler::Node* string, compiler::Node* length, compiler::Node* index,
1867 UnicodeEncoding encoding) {
1868 Label handle_surrogate_pair(this), return_result(this);
1869 Variable var_result(this, MachineRepresentation::kWord32);
1870 Variable var_trail(this, MachineRepresentation::kWord32);
1871 var_result.Bind(StringCharCodeAt(string, index));
1872 var_trail.Bind(Int32Constant(0));
1873
1874 GotoIf(Word32NotEqual(Word32And(var_result.value(), Int32Constant(0xFC00)),
1875 Int32Constant(0xD800)),
1876 &return_result);
1877 Node* next_index = SmiAdd(index, SmiConstant(Smi::FromInt(1)));
1878
1879 GotoIfNot(SmiLessThan(next_index, length), &return_result);
1880 var_trail.Bind(StringCharCodeAt(string, next_index));
1881 Branch(Word32Equal(Word32And(var_trail.value(), Int32Constant(0xFC00)),
1882 Int32Constant(0xDC00)),
1883 &handle_surrogate_pair, &return_result);
1884
1885 Bind(&handle_surrogate_pair);
1886 {
1887 Node* lead = var_result.value();
1888 Node* trail = var_trail.value();
1889
1890 // Check that this path is only taken if a surrogate pair is found
1891 CSA_SLOW_ASSERT(this,
1892 Uint32GreaterThanOrEqual(lead, Int32Constant(0xD800)));
1893 CSA_SLOW_ASSERT(this, Uint32LessThan(lead, Int32Constant(0xDC00)));
1894 CSA_SLOW_ASSERT(this,
1895 Uint32GreaterThanOrEqual(trail, Int32Constant(0xDC00)));
1896 CSA_SLOW_ASSERT(this, Uint32LessThan(trail, Int32Constant(0xE000)));
1897
1898 switch (encoding) {
1899 case UnicodeEncoding::UTF16:
1900 var_result.Bind(Word32Or(
1901 // Need to swap the order for big-endian platforms
1902 #if V8_TARGET_BIG_ENDIAN
1903 Word32Shl(lead, Int32Constant(16)), trail));
1904 #else
1905 Word32Shl(trail, Int32Constant(16)), lead));
1906 #endif
1907 break;
1908
1909 case UnicodeEncoding::UTF32: {
1910 // Convert UTF16 surrogate pair into |word32| code point, encoded as
1911 // UTF32.
1912 Node* surrogate_offset =
1913 Int32Constant(0x10000 - (0xD800 << 10) - 0xDC00);
1914
1915 // (lead << 10) + trail + SURROGATE_OFFSET
1916 var_result.Bind(Int32Add(WordShl(lead, Int32Constant(10)),
1917 Int32Add(trail, surrogate_offset)));
1918 break;
1919 }
1920 }
1921 Goto(&return_result);
1922 }
1923
1924 Bind(&return_result);
1925 return var_result.value();
1926 }
1927
TF_BUILTIN(StringIteratorPrototypeNext,StringBuiltinsAssembler)1928 TF_BUILTIN(StringIteratorPrototypeNext, StringBuiltinsAssembler) {
1929 Variable var_value(this, MachineRepresentation::kTagged);
1930 Variable var_done(this, MachineRepresentation::kTagged);
1931
1932 var_value.Bind(UndefinedConstant());
1933 var_done.Bind(BooleanConstant(true));
1934
1935 Label throw_bad_receiver(this), next_codepoint(this), return_result(this);
1936
1937 Node* iterator = Parameter(0);
1938 Node* context = Parameter(3);
1939
1940 GotoIf(TaggedIsSmi(iterator), &throw_bad_receiver);
1941 GotoIfNot(Word32Equal(LoadInstanceType(iterator),
1942 Int32Constant(JS_STRING_ITERATOR_TYPE)),
1943 &throw_bad_receiver);
1944
1945 Node* string = LoadObjectField(iterator, JSStringIterator::kStringOffset);
1946 Node* position =
1947 LoadObjectField(iterator, JSStringIterator::kNextIndexOffset);
1948 Node* length = LoadObjectField(string, String::kLengthOffset);
1949
1950 Branch(SmiLessThan(position, length), &next_codepoint, &return_result);
1951
1952 Bind(&next_codepoint);
1953 {
1954 UnicodeEncoding encoding = UnicodeEncoding::UTF16;
1955 Node* ch = LoadSurrogatePairAt(string, length, position, encoding);
1956 Node* value = StringFromCodePoint(ch, encoding);
1957 var_value.Bind(value);
1958 Node* length = LoadObjectField(value, String::kLengthOffset);
1959 StoreObjectFieldNoWriteBarrier(iterator, JSStringIterator::kNextIndexOffset,
1960 SmiAdd(position, length));
1961 var_done.Bind(BooleanConstant(false));
1962 Goto(&return_result);
1963 }
1964
1965 Bind(&return_result);
1966 {
1967 Node* native_context = LoadNativeContext(context);
1968 Node* map =
1969 LoadContextElement(native_context, Context::ITERATOR_RESULT_MAP_INDEX);
1970 Node* result = Allocate(JSIteratorResult::kSize);
1971 StoreMapNoWriteBarrier(result, map);
1972 StoreObjectFieldRoot(result, JSIteratorResult::kPropertiesOffset,
1973 Heap::kEmptyFixedArrayRootIndex);
1974 StoreObjectFieldRoot(result, JSIteratorResult::kElementsOffset,
1975 Heap::kEmptyFixedArrayRootIndex);
1976 StoreObjectFieldNoWriteBarrier(result, JSIteratorResult::kValueOffset,
1977 var_value.value());
1978 StoreObjectFieldNoWriteBarrier(result, JSIteratorResult::kDoneOffset,
1979 var_done.value());
1980 Return(result);
1981 }
1982
1983 Bind(&throw_bad_receiver);
1984 {
1985 // The {receiver} is not a valid JSGeneratorObject.
1986 CallRuntime(Runtime::kThrowIncompatibleMethodReceiver, context,
1987 HeapConstant(factory()->NewStringFromAsciiChecked(
1988 "String Iterator.prototype.next", TENURED)),
1989 iterator);
1990 Unreachable();
1991 }
1992 }
1993
1994 namespace {
1995
ToUpperOverflows(uc32 character)1996 inline bool ToUpperOverflows(uc32 character) {
1997 // y with umlauts and the micro sign are the only characters that stop
1998 // fitting into one-byte when converting to uppercase.
1999 static const uc32 yuml_code = 0xff;
2000 static const uc32 micro_code = 0xb5;
2001 return (character == yuml_code || character == micro_code);
2002 }
2003
2004 template <class Converter>
ConvertCaseHelper(Isolate * isolate,String * string,SeqString * result,int result_length,unibrow::Mapping<Converter,128> * mapping)2005 MUST_USE_RESULT static Object* ConvertCaseHelper(
2006 Isolate* isolate, String* string, SeqString* result, int result_length,
2007 unibrow::Mapping<Converter, 128>* mapping) {
2008 DisallowHeapAllocation no_gc;
2009 // We try this twice, once with the assumption that the result is no longer
2010 // than the input and, if that assumption breaks, again with the exact
2011 // length. This may not be pretty, but it is nicer than what was here before
2012 // and I hereby claim my vaffel-is.
2013 //
2014 // NOTE: This assumes that the upper/lower case of an ASCII
2015 // character is also ASCII. This is currently the case, but it
2016 // might break in the future if we implement more context and locale
2017 // dependent upper/lower conversions.
2018 bool has_changed_character = false;
2019
2020 // Convert all characters to upper case, assuming that they will fit
2021 // in the buffer
2022 StringCharacterStream stream(string);
2023 unibrow::uchar chars[Converter::kMaxWidth];
2024 // We can assume that the string is not empty
2025 uc32 current = stream.GetNext();
2026 bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
2027 for (int i = 0; i < result_length;) {
2028 bool has_next = stream.HasMore();
2029 uc32 next = has_next ? stream.GetNext() : 0;
2030 int char_length = mapping->get(current, next, chars);
2031 if (char_length == 0) {
2032 // The case conversion of this character is the character itself.
2033 result->Set(i, current);
2034 i++;
2035 } else if (char_length == 1 &&
2036 (ignore_overflow || !ToUpperOverflows(current))) {
2037 // Common case: converting the letter resulted in one character.
2038 DCHECK(static_cast<uc32>(chars[0]) != current);
2039 result->Set(i, chars[0]);
2040 has_changed_character = true;
2041 i++;
2042 } else if (result_length == string->length()) {
2043 bool overflows = ToUpperOverflows(current);
2044 // We've assumed that the result would be as long as the
2045 // input but here is a character that converts to several
2046 // characters. No matter, we calculate the exact length
2047 // of the result and try the whole thing again.
2048 //
2049 // Note that this leaves room for optimization. We could just
2050 // memcpy what we already have to the result string. Also,
2051 // the result string is the last object allocated we could
2052 // "realloc" it and probably, in the vast majority of cases,
2053 // extend the existing string to be able to hold the full
2054 // result.
2055 int next_length = 0;
2056 if (has_next) {
2057 next_length = mapping->get(next, 0, chars);
2058 if (next_length == 0) next_length = 1;
2059 }
2060 int current_length = i + char_length + next_length;
2061 while (stream.HasMore()) {
2062 current = stream.GetNext();
2063 overflows |= ToUpperOverflows(current);
2064 // NOTE: we use 0 as the next character here because, while
2065 // the next character may affect what a character converts to,
2066 // it does not in any case affect the length of what it convert
2067 // to.
2068 int char_length = mapping->get(current, 0, chars);
2069 if (char_length == 0) char_length = 1;
2070 current_length += char_length;
2071 if (current_length > String::kMaxLength) {
2072 AllowHeapAllocation allocate_error_and_return;
2073 THROW_NEW_ERROR_RETURN_FAILURE(isolate,
2074 NewInvalidStringLengthError());
2075 }
2076 }
2077 // Try again with the real length. Return signed if we need
2078 // to allocate a two-byte string for to uppercase.
2079 return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
2080 : Smi::FromInt(current_length);
2081 } else {
2082 for (int j = 0; j < char_length; j++) {
2083 result->Set(i, chars[j]);
2084 i++;
2085 }
2086 has_changed_character = true;
2087 }
2088 current = next;
2089 }
2090 if (has_changed_character) {
2091 return result;
2092 } else {
2093 // If we didn't actually change anything in doing the conversion
2094 // we simple return the result and let the converted string
2095 // become garbage; there is no reason to keep two identical strings
2096 // alive.
2097 return string;
2098 }
2099 }
2100
2101 template <class Converter>
ConvertCase(Handle<String> s,Isolate * isolate,unibrow::Mapping<Converter,128> * mapping)2102 MUST_USE_RESULT static Object* ConvertCase(
2103 Handle<String> s, Isolate* isolate,
2104 unibrow::Mapping<Converter, 128>* mapping) {
2105 s = String::Flatten(s);
2106 int length = s->length();
2107 // Assume that the string is not empty; we need this assumption later
2108 if (length == 0) return *s;
2109
2110 // Simpler handling of ASCII strings.
2111 //
2112 // NOTE: This assumes that the upper/lower case of an ASCII
2113 // character is also ASCII. This is currently the case, but it
2114 // might break in the future if we implement more context and locale
2115 // dependent upper/lower conversions.
2116 if (s->IsOneByteRepresentationUnderneath()) {
2117 // Same length as input.
2118 Handle<SeqOneByteString> result =
2119 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
2120 DisallowHeapAllocation no_gc;
2121 String::FlatContent flat_content = s->GetFlatContent();
2122 DCHECK(flat_content.IsFlat());
2123 bool has_changed_character = false;
2124 int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
2125 reinterpret_cast<char*>(result->GetChars()),
2126 reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
2127 length, &has_changed_character);
2128 // If not ASCII, we discard the result and take the 2 byte path.
2129 if (index_to_first_unprocessed == length)
2130 return has_changed_character ? *result : *s;
2131 }
2132
2133 Handle<SeqString> result; // Same length as input.
2134 if (s->IsOneByteRepresentation()) {
2135 result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
2136 } else {
2137 result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
2138 }
2139
2140 Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
2141 if (answer->IsException(isolate) || answer->IsString()) return answer;
2142
2143 DCHECK(answer->IsSmi());
2144 length = Smi::cast(answer)->value();
2145 if (s->IsOneByteRepresentation() && length > 0) {
2146 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
2147 isolate, result, isolate->factory()->NewRawOneByteString(length));
2148 } else {
2149 if (length < 0) length = -length;
2150 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
2151 isolate, result, isolate->factory()->NewRawTwoByteString(length));
2152 }
2153 return ConvertCaseHelper(isolate, *s, *result, length, mapping);
2154 }
2155
2156 } // namespace
2157
BUILTIN(StringPrototypeToLocaleLowerCase)2158 BUILTIN(StringPrototypeToLocaleLowerCase) {
2159 HandleScope scope(isolate);
2160 TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
2161 return ConvertCase(string, isolate,
2162 isolate->runtime_state()->to_lower_mapping());
2163 }
2164
BUILTIN(StringPrototypeToLocaleUpperCase)2165 BUILTIN(StringPrototypeToLocaleUpperCase) {
2166 HandleScope scope(isolate);
2167 TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
2168 return ConvertCase(string, isolate,
2169 isolate->runtime_state()->to_upper_mapping());
2170 }
2171
BUILTIN(StringPrototypeToLowerCase)2172 BUILTIN(StringPrototypeToLowerCase) {
2173 HandleScope scope(isolate);
2174 TO_THIS_STRING(string, "String.prototype.toLowerCase");
2175 return ConvertCase(string, isolate,
2176 isolate->runtime_state()->to_lower_mapping());
2177 }
2178
BUILTIN(StringPrototypeToUpperCase)2179 BUILTIN(StringPrototypeToUpperCase) {
2180 HandleScope scope(isolate);
2181 TO_THIS_STRING(string, "String.prototype.toUpperCase");
2182 return ConvertCase(string, isolate,
2183 isolate->runtime_state()->to_upper_mapping());
2184 }
2185
2186 } // namespace internal
2187 } // namespace v8
2188