• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include 'src/builtins/builtins-string-gen.h'
6
7@abstract
8@reserveBitsInInstanceType(7)
9extern class String extends Name {
10  macro StringInstanceType(): StringInstanceType {
11    return %RawDownCast<StringInstanceType>(
12        Convert<uint16>(this.map.instance_type));
13  }
14
15  macro IsNotInternalized(): bool {
16    return this.StringInstanceType().is_not_internalized;
17  }
18
19  const length: int32;
20}
21
22extern enum StringRepresentationTag extends uint32 {
23  kSeqStringTag,
24  kConsStringTag,
25  kExternalStringTag,
26  kSlicedStringTag,
27  kThinStringTag
28}
29
30bitfield struct StringInstanceType extends uint16 {
31  representation: StringRepresentationTag: 3 bit;
32  is_one_byte: bool: 1 bit;
33  is_uncached: bool: 1 bit;
34  is_not_internalized: bool: 1 bit;
35  is_shared: bool: 1 bit;
36}
37
38@generateBodyDescriptor
39@doNotGenerateCast
40extern class ConsString extends String {
41  // Corresponds to String::IsFlat() in the C++ runtime.
42  macro IsFlat(): bool {
43    return this.second.length == 0;
44  }
45
46  macro IsOneByteRepresentation(): bool {
47    return this.StringInstanceType().is_one_byte;
48  }
49
50  first: String;
51  second: String;
52}
53
54@abstract
55@doNotGenerateCast
56extern class ExternalString extends String {
57  resource: ExternalPointer;
58  // WARNING: This field is missing for uncached external strings.
59  resource_data: ExternalPointer;
60}
61
62extern operator '.resource_ptr' macro LoadExternalStringResourcePtr(
63    ExternalString): RawPtr;
64extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr(
65    ExternalString): RawPtr;
66extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr(
67    ExternalOneByteString): RawPtr<char8>;
68extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr(
69    ExternalTwoByteString): RawPtr<char16>;
70
71extern macro ExternalOneByteStringGetChars(ExternalOneByteString):
72    RawPtr<char8>;
73extern macro ExternalTwoByteStringGetChars(ExternalTwoByteString):
74    RawPtr<char16>;
75
76@doNotGenerateCast
77extern class ExternalOneByteString extends ExternalString {
78  macro GetChars(): RawPtr<char8> {
79    if (this.StringInstanceType().is_uncached) {
80      return ExternalOneByteStringGetChars(this);
81    } else {
82      return this.resource_data_ptr;
83    }
84  }
85}
86
87@doNotGenerateCast
88extern class ExternalTwoByteString extends ExternalString {
89  macro GetChars(): RawPtr<char16> {
90    if (this.StringInstanceType().is_uncached) {
91      return ExternalTwoByteStringGetChars(this);
92    } else {
93      return this.resource_data_ptr;
94    }
95  }
96}
97
98@doNotGenerateCast
99extern class InternalizedString extends String {
100}
101
102@abstract
103@doNotGenerateCast
104extern class SeqString extends String {
105}
106@generateBodyDescriptor
107@doNotGenerateCast
108extern class SeqOneByteString extends SeqString {
109  const chars[length]: char8;
110}
111@generateBodyDescriptor
112@doNotGenerateCast
113extern class SeqTwoByteString extends SeqString {
114  const chars[length]: char16;
115}
116
117@generateBodyDescriptor
118@doNotGenerateCast
119extern class SlicedString extends String {
120  parent: String;
121  offset: Smi;
122}
123
124@generateBodyDescriptor
125@doNotGenerateCast
126extern class ThinString extends String {
127  actual: String;
128}
129
130// A direct string can be accessed directly through CSA without going into the
131// C++ runtime. See also: ToDirectStringAssembler.
132type DirectString extends String;
133
134macro AllocateNonEmptySeqOneByteString<Iterator: type>(
135    length: uint32, content: Iterator): SeqOneByteString {
136  dcheck(length != 0 && length <= kStringMaxLength);
137  return new SeqOneByteString{
138    map: kOneByteStringMap,
139    raw_hash_field: kNameEmptyHashField,
140    length: Signed(length),
141    chars: ...content
142  };
143}
144
145macro AllocateNonEmptySeqTwoByteString<Iterator: type>(
146    length: uint32, content: Iterator): SeqTwoByteString {
147  dcheck(length > 0 && length <= kStringMaxLength);
148  return new SeqTwoByteString{
149    map: kStringMap,
150    raw_hash_field: kNameEmptyHashField,
151    length: Signed(length),
152    chars: ...content
153  };
154}
155
156macro AllocateNonEmptySeqOneByteString(length: uint32): SeqOneByteString {
157  return AllocateNonEmptySeqOneByteString(length, UninitializedIterator{});
158}
159macro AllocateNonEmptySeqTwoByteString(length: uint32): SeqTwoByteString {
160  return AllocateNonEmptySeqTwoByteString(length, UninitializedIterator{});
161}
162
163macro AllocateSeqOneByteString<Iterator: type>(
164    length: uint32, content: Iterator): SeqOneByteString|EmptyString {
165  if (length == 0) return kEmptyString;
166  return AllocateNonEmptySeqOneByteString(length, content);
167}
168
169macro AllocateSeqTwoByteString<Iterator: type>(
170    length: uint32, content: Iterator): SeqTwoByteString|EmptyString {
171  if (length == 0) return kEmptyString;
172  return AllocateNonEmptySeqTwoByteString(length, content);
173}
174
175@export
176macro AllocateSeqOneByteString(length: uint32): SeqOneByteString|EmptyString {
177  return AllocateSeqOneByteString(length, UninitializedIterator{});
178}
179
180@export
181macro AllocateSeqTwoByteString(length: uint32): SeqTwoByteString|EmptyString {
182  return AllocateSeqTwoByteString(length, UninitializedIterator{});
183}
184
185extern macro StringWriteToFlatOneByte(
186    String, RawPtr<char8>, int32, int32): void;
187extern macro StringWriteToFlatTwoByte(
188    String, RawPtr<char16>, int32, int32): void;
189
190// Corresponds to String::SlowFlatten in the C++ runtime.
191builtin StringSlowFlatten(cons: ConsString): String {
192  // TurboFan can create cons strings with empty first parts.
193  let cons = cons;
194  while (cons.first.length == 0) {
195    // We do not want to call this function recursively. Therefore we call
196    // String::Flatten only in those cases where String::SlowFlatten is not
197    // called again.
198    try {
199      const second = Cast<ConsString>(cons.second) otherwise FoundFlatString;
200      if (second.IsFlat()) goto FoundFlatString;
201      cons = second;
202    } label FoundFlatString {
203      return Flatten(cons.second);
204    }
205  }
206
207  let flat: String;
208  if (cons.IsOneByteRepresentation()) {
209    const allocated = AllocateNonEmptySeqOneByteString(Unsigned(cons.length));
210    StringWriteToFlatOneByte(
211        cons, (&allocated.chars).GCUnsafeStartPointer(), 0, cons.length);
212    flat = allocated;
213  } else {
214    const allocated = UnsafeCast<SeqTwoByteString>(
215        AllocateNonEmptySeqTwoByteString(Unsigned(cons.length)));
216    StringWriteToFlatTwoByte(
217        cons, (&allocated.chars).GCUnsafeStartPointer(), 0, cons.length);
218    flat = allocated;
219  }
220  cons.first = flat;
221  cons.second = kEmptyString;
222  return flat;
223}
224
225// Corresponds to String::Flatten in the C++ runtime.
226macro Flatten(string: String): String {
227  typeswitch (string) {
228    case (cons: ConsString): {
229      return Flatten(cons);
230    }
231    case (thin: ThinString): {
232      dcheck(!Is<ConsString>(thin.actual));
233      return thin.actual;
234    }
235    case (other: String): {
236      return other;
237    }
238  }
239}
240macro Flatten(cons: ConsString): String {
241  if (cons.IsFlat()) return cons.first;
242  return StringSlowFlatten(cons);
243}
244
245// Get a slice to the string data, flatten only if unavoidable for this.
246macro StringToSlice(string: String): never labels OneByte(ConstSlice<char8>),
247    TwoByte(ConstSlice<char16>) {
248  let string = string;
249  let offset: intptr = 0;
250  const length = Convert<intptr>(string.length);
251  while (true) {
252    typeswitch (string) {
253      case (s: SeqOneByteString): {
254        goto OneByte(Subslice(&s.chars, offset, length) otherwise unreachable);
255      }
256      case (s: SeqTwoByteString): {
257        goto TwoByte(Subslice(&s.chars, offset, length) otherwise unreachable);
258      }
259      case (s: ThinString): {
260        string = s.actual;
261      }
262      case (s: ConsString): {
263        string = Flatten(s);
264      }
265      case (s: SlicedString): {
266        offset += Convert<intptr>(s.offset);
267        string = s.parent;
268      }
269      case (s: ExternalOneByteString): {
270        const data = torque_internal::unsafe::NewOffHeapConstSlice(
271            s.GetChars(), Convert<intptr>(s.length));
272        goto OneByte(Subslice(data, offset, length) otherwise unreachable);
273      }
274      case (s: ExternalTwoByteString): {
275        const data = torque_internal::unsafe::NewOffHeapConstSlice(
276            s.GetChars(), Convert<intptr>(s.length));
277        goto TwoByte(Subslice(data, offset, length) otherwise unreachable);
278      }
279      case (String): {
280        unreachable;
281      }
282    }
283  }
284  VerifiedUnreachable();
285}
286
287// Dispatch on the slice type of two different strings.
288macro TwoStringsToSlices<Result: type, Functor: type>(
289    s1: String, s2: String, f: Functor): Result {
290  try {
291    StringToSlice(s1) otherwise FirstOneByte, FirstTwoByte;
292  } label FirstOneByte(s1Slice: ConstSlice<char8>) {
293    try {
294      StringToSlice(s2) otherwise SecondOneByte, SecondTwoByte;
295    } label SecondOneByte(s2Slice: ConstSlice<char8>) {
296      return Call(f, s1Slice, s2Slice);
297    } label SecondTwoByte(s2Slice: ConstSlice<char16>) {
298      return Call(f, s1Slice, s2Slice);
299    }
300  } label FirstTwoByte(s1Slice: ConstSlice<char16>) {
301    try {
302      StringToSlice(s2) otherwise SecondOneByte, SecondTwoByte;
303    } label SecondOneByte(s2Slice: ConstSlice<char8>) {
304      return Call(f, s1Slice, s2Slice);
305    } label SecondTwoByte(s2Slice: ConstSlice<char16>) {
306      return Call(f, s1Slice, s2Slice);
307    }
308  }
309}
310
311macro StaticAssertStringLengthFitsSmi(): void {
312  const kMaxStringLengthFitsSmi: constexpr bool =
313      kStringMaxLengthUintptr < kSmiMaxValue;
314  static_assert(kMaxStringLengthFitsSmi);
315}
316
317extern macro StringBuiltinsAssembler::SearchOneByteStringInTwoByteString(
318    RawPtr<char16>, intptr, RawPtr<char8>, intptr, intptr): intptr;
319extern macro StringBuiltinsAssembler::SearchOneByteStringInOneByteString(
320    RawPtr<char8>, intptr, RawPtr<char8>, intptr, intptr): intptr;
321extern macro StringBuiltinsAssembler::SearchTwoByteStringInTwoByteString(
322    RawPtr<char16>, intptr, RawPtr<char16>, intptr, intptr): intptr;
323extern macro StringBuiltinsAssembler::SearchTwoByteStringInOneByteString(
324    RawPtr<char8>, intptr, RawPtr<char16>, intptr, intptr): intptr;
325extern macro StringBuiltinsAssembler::SearchOneByteInOneByteString(
326    RawPtr<char8>, intptr, RawPtr<char8>, intptr): intptr;
327
328macro AbstractStringIndexOf(
329    subject: RawPtr<char16>, subjectLen: intptr, search: RawPtr<char8>,
330    searchLen: intptr, fromIndex: intptr): intptr {
331  return SearchOneByteStringInTwoByteString(
332      subject, subjectLen, search, searchLen, fromIndex);
333}
334macro AbstractStringIndexOf(
335    subject: RawPtr<char8>, subjectLen: intptr, search: RawPtr<char8>,
336    searchLen: intptr, fromIndex: intptr): intptr {
337  if (searchLen == 1) {
338    return SearchOneByteInOneByteString(subject, subjectLen, search, fromIndex);
339  }
340  return SearchOneByteStringInOneByteString(
341      subject, subjectLen, search, searchLen, fromIndex);
342}
343macro AbstractStringIndexOf(
344    subject: RawPtr<char16>, subjectLen: intptr, search: RawPtr<char16>,
345    searchLen: intptr, fromIndex: intptr): intptr {
346  return SearchTwoByteStringInTwoByteString(
347      subject, subjectLen, search, searchLen, fromIndex);
348}
349macro AbstractStringIndexOf(
350    subject: RawPtr<char8>, subjectLen: intptr, search: RawPtr<char16>,
351    searchLen: intptr, fromIndex: intptr): intptr {
352  return SearchTwoByteStringInOneByteString(
353      subject, subjectLen, search, searchLen, fromIndex);
354}
355
356struct AbstractStringIndexOfFunctor {
357  fromIndex: Smi;
358}
359// Ideally, this would be a method of AbstractStringIndexOfFunctor, but
360// currently methods don't support templates.
361macro Call<A: type, B: type>(
362    self: AbstractStringIndexOfFunctor, string: ConstSlice<A>,
363    searchStr: ConstSlice<B>): Smi {
364  return Convert<Smi>(AbstractStringIndexOf(
365      string.GCUnsafeStartPointer(), string.length,
366      searchStr.GCUnsafeStartPointer(), searchStr.length,
367      Convert<intptr>(self.fromIndex)));
368}
369
370macro AbstractStringIndexOf(implicit context: Context)(
371    string: String, searchString: String, fromIndex: Smi): Smi {
372  // Special case the empty string.
373  const searchStringLength = searchString.length_intptr;
374  const stringLength = string.length_intptr;
375  if (searchStringLength == 0 && SmiUntag(fromIndex) <= stringLength) {
376    return fromIndex;
377  }
378
379  // Don't bother to search if the searchString would go past the end
380  // of the string. This is actually necessary because of runtime
381  // checks.
382  if (SmiUntag(fromIndex) + searchStringLength > stringLength) {
383    return -1;
384  }
385
386  return TwoStringsToSlices<Smi>(
387      string, searchString, AbstractStringIndexOfFunctor{fromIndex: fromIndex});
388}
389
390builtin StringIndexOf(implicit context: Context)(
391    s: String, searchString: String, start: Smi): Smi {
392  return AbstractStringIndexOf(s, searchString, SmiMax(start, 0));
393}
394