1// Copyright 2019 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include 'src/builtins/builtins-string-gen.h' 6 7@abstract 8@reserveBitsInInstanceType(7) 9extern class String extends Name { 10 macro StringInstanceType(): StringInstanceType { 11 return %RawDownCast<StringInstanceType>( 12 Convert<uint16>(this.map.instance_type)); 13 } 14 15 macro IsNotInternalized(): bool { 16 return this.StringInstanceType().is_not_internalized; 17 } 18 19 const length: int32; 20} 21 22extern enum StringRepresentationTag extends uint32 { 23 kSeqStringTag, 24 kConsStringTag, 25 kExternalStringTag, 26 kSlicedStringTag, 27 kThinStringTag 28} 29 30bitfield struct StringInstanceType extends uint16 { 31 representation: StringRepresentationTag: 3 bit; 32 is_one_byte: bool: 1 bit; 33 is_uncached: bool: 1 bit; 34 is_not_internalized: bool: 1 bit; 35 is_shared: bool: 1 bit; 36} 37 38@generateBodyDescriptor 39@doNotGenerateCast 40extern class ConsString extends String { 41 // Corresponds to String::IsFlat() in the C++ runtime. 42 macro IsFlat(): bool { 43 return this.second.length == 0; 44 } 45 46 macro IsOneByteRepresentation(): bool { 47 return this.StringInstanceType().is_one_byte; 48 } 49 50 first: String; 51 second: String; 52} 53 54@abstract 55@doNotGenerateCast 56extern class ExternalString extends String { 57 resource: ExternalPointer; 58 // WARNING: This field is missing for uncached external strings. 59 resource_data: ExternalPointer; 60} 61 62extern operator '.resource_ptr' macro LoadExternalStringResourcePtr( 63 ExternalString): RawPtr; 64extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr( 65 ExternalString): RawPtr; 66extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr( 67 ExternalOneByteString): RawPtr<char8>; 68extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr( 69 ExternalTwoByteString): RawPtr<char16>; 70 71extern macro ExternalOneByteStringGetChars(ExternalOneByteString): 72 RawPtr<char8>; 73extern macro ExternalTwoByteStringGetChars(ExternalTwoByteString): 74 RawPtr<char16>; 75 76@doNotGenerateCast 77extern class ExternalOneByteString extends ExternalString { 78 macro GetChars(): RawPtr<char8> { 79 if (this.StringInstanceType().is_uncached) { 80 return ExternalOneByteStringGetChars(this); 81 } else { 82 return this.resource_data_ptr; 83 } 84 } 85} 86 87@doNotGenerateCast 88extern class ExternalTwoByteString extends ExternalString { 89 macro GetChars(): RawPtr<char16> { 90 if (this.StringInstanceType().is_uncached) { 91 return ExternalTwoByteStringGetChars(this); 92 } else { 93 return this.resource_data_ptr; 94 } 95 } 96} 97 98@doNotGenerateCast 99extern class InternalizedString extends String { 100} 101 102@abstract 103@doNotGenerateCast 104extern class SeqString extends String { 105} 106@generateBodyDescriptor 107@doNotGenerateCast 108extern class SeqOneByteString extends SeqString { 109 const chars[length]: char8; 110} 111@generateBodyDescriptor 112@doNotGenerateCast 113extern class SeqTwoByteString extends SeqString { 114 const chars[length]: char16; 115} 116 117@generateBodyDescriptor 118@doNotGenerateCast 119extern class SlicedString extends String { 120 parent: String; 121 offset: Smi; 122} 123 124@generateBodyDescriptor 125@doNotGenerateCast 126extern class ThinString extends String { 127 actual: String; 128} 129 130// A direct string can be accessed directly through CSA without going into the 131// C++ runtime. See also: ToDirectStringAssembler. 132type DirectString extends String; 133 134macro AllocateNonEmptySeqOneByteString<Iterator: type>( 135 length: uint32, content: Iterator): SeqOneByteString { 136 dcheck(length != 0 && length <= kStringMaxLength); 137 return new SeqOneByteString{ 138 map: kOneByteStringMap, 139 raw_hash_field: kNameEmptyHashField, 140 length: Signed(length), 141 chars: ...content 142 }; 143} 144 145macro AllocateNonEmptySeqTwoByteString<Iterator: type>( 146 length: uint32, content: Iterator): SeqTwoByteString { 147 dcheck(length > 0 && length <= kStringMaxLength); 148 return new SeqTwoByteString{ 149 map: kStringMap, 150 raw_hash_field: kNameEmptyHashField, 151 length: Signed(length), 152 chars: ...content 153 }; 154} 155 156macro AllocateNonEmptySeqOneByteString(length: uint32): SeqOneByteString { 157 return AllocateNonEmptySeqOneByteString(length, UninitializedIterator{}); 158} 159macro AllocateNonEmptySeqTwoByteString(length: uint32): SeqTwoByteString { 160 return AllocateNonEmptySeqTwoByteString(length, UninitializedIterator{}); 161} 162 163macro AllocateSeqOneByteString<Iterator: type>( 164 length: uint32, content: Iterator): SeqOneByteString|EmptyString { 165 if (length == 0) return kEmptyString; 166 return AllocateNonEmptySeqOneByteString(length, content); 167} 168 169macro AllocateSeqTwoByteString<Iterator: type>( 170 length: uint32, content: Iterator): SeqTwoByteString|EmptyString { 171 if (length == 0) return kEmptyString; 172 return AllocateNonEmptySeqTwoByteString(length, content); 173} 174 175@export 176macro AllocateSeqOneByteString(length: uint32): SeqOneByteString|EmptyString { 177 return AllocateSeqOneByteString(length, UninitializedIterator{}); 178} 179 180@export 181macro AllocateSeqTwoByteString(length: uint32): SeqTwoByteString|EmptyString { 182 return AllocateSeqTwoByteString(length, UninitializedIterator{}); 183} 184 185extern macro StringWriteToFlatOneByte( 186 String, RawPtr<char8>, int32, int32): void; 187extern macro StringWriteToFlatTwoByte( 188 String, RawPtr<char16>, int32, int32): void; 189 190// Corresponds to String::SlowFlatten in the C++ runtime. 191builtin StringSlowFlatten(cons: ConsString): String { 192 // TurboFan can create cons strings with empty first parts. 193 let cons = cons; 194 while (cons.first.length == 0) { 195 // We do not want to call this function recursively. Therefore we call 196 // String::Flatten only in those cases where String::SlowFlatten is not 197 // called again. 198 try { 199 const second = Cast<ConsString>(cons.second) otherwise FoundFlatString; 200 if (second.IsFlat()) goto FoundFlatString; 201 cons = second; 202 } label FoundFlatString { 203 return Flatten(cons.second); 204 } 205 } 206 207 let flat: String; 208 if (cons.IsOneByteRepresentation()) { 209 const allocated = AllocateNonEmptySeqOneByteString(Unsigned(cons.length)); 210 StringWriteToFlatOneByte( 211 cons, (&allocated.chars).GCUnsafeStartPointer(), 0, cons.length); 212 flat = allocated; 213 } else { 214 const allocated = UnsafeCast<SeqTwoByteString>( 215 AllocateNonEmptySeqTwoByteString(Unsigned(cons.length))); 216 StringWriteToFlatTwoByte( 217 cons, (&allocated.chars).GCUnsafeStartPointer(), 0, cons.length); 218 flat = allocated; 219 } 220 cons.first = flat; 221 cons.second = kEmptyString; 222 return flat; 223} 224 225// Corresponds to String::Flatten in the C++ runtime. 226macro Flatten(string: String): String { 227 typeswitch (string) { 228 case (cons: ConsString): { 229 return Flatten(cons); 230 } 231 case (thin: ThinString): { 232 dcheck(!Is<ConsString>(thin.actual)); 233 return thin.actual; 234 } 235 case (other: String): { 236 return other; 237 } 238 } 239} 240macro Flatten(cons: ConsString): String { 241 if (cons.IsFlat()) return cons.first; 242 return StringSlowFlatten(cons); 243} 244 245// Get a slice to the string data, flatten only if unavoidable for this. 246macro StringToSlice(string: String): never labels OneByte(ConstSlice<char8>), 247 TwoByte(ConstSlice<char16>) { 248 let string = string; 249 let offset: intptr = 0; 250 const length = Convert<intptr>(string.length); 251 while (true) { 252 typeswitch (string) { 253 case (s: SeqOneByteString): { 254 goto OneByte(Subslice(&s.chars, offset, length) otherwise unreachable); 255 } 256 case (s: SeqTwoByteString): { 257 goto TwoByte(Subslice(&s.chars, offset, length) otherwise unreachable); 258 } 259 case (s: ThinString): { 260 string = s.actual; 261 } 262 case (s: ConsString): { 263 string = Flatten(s); 264 } 265 case (s: SlicedString): { 266 offset += Convert<intptr>(s.offset); 267 string = s.parent; 268 } 269 case (s: ExternalOneByteString): { 270 const data = torque_internal::unsafe::NewOffHeapConstSlice( 271 s.GetChars(), Convert<intptr>(s.length)); 272 goto OneByte(Subslice(data, offset, length) otherwise unreachable); 273 } 274 case (s: ExternalTwoByteString): { 275 const data = torque_internal::unsafe::NewOffHeapConstSlice( 276 s.GetChars(), Convert<intptr>(s.length)); 277 goto TwoByte(Subslice(data, offset, length) otherwise unreachable); 278 } 279 case (String): { 280 unreachable; 281 } 282 } 283 } 284 VerifiedUnreachable(); 285} 286 287// Dispatch on the slice type of two different strings. 288macro TwoStringsToSlices<Result: type, Functor: type>( 289 s1: String, s2: String, f: Functor): Result { 290 try { 291 StringToSlice(s1) otherwise FirstOneByte, FirstTwoByte; 292 } label FirstOneByte(s1Slice: ConstSlice<char8>) { 293 try { 294 StringToSlice(s2) otherwise SecondOneByte, SecondTwoByte; 295 } label SecondOneByte(s2Slice: ConstSlice<char8>) { 296 return Call(f, s1Slice, s2Slice); 297 } label SecondTwoByte(s2Slice: ConstSlice<char16>) { 298 return Call(f, s1Slice, s2Slice); 299 } 300 } label FirstTwoByte(s1Slice: ConstSlice<char16>) { 301 try { 302 StringToSlice(s2) otherwise SecondOneByte, SecondTwoByte; 303 } label SecondOneByte(s2Slice: ConstSlice<char8>) { 304 return Call(f, s1Slice, s2Slice); 305 } label SecondTwoByte(s2Slice: ConstSlice<char16>) { 306 return Call(f, s1Slice, s2Slice); 307 } 308 } 309} 310 311macro StaticAssertStringLengthFitsSmi(): void { 312 const kMaxStringLengthFitsSmi: constexpr bool = 313 kStringMaxLengthUintptr < kSmiMaxValue; 314 static_assert(kMaxStringLengthFitsSmi); 315} 316 317extern macro StringBuiltinsAssembler::SearchOneByteStringInTwoByteString( 318 RawPtr<char16>, intptr, RawPtr<char8>, intptr, intptr): intptr; 319extern macro StringBuiltinsAssembler::SearchOneByteStringInOneByteString( 320 RawPtr<char8>, intptr, RawPtr<char8>, intptr, intptr): intptr; 321extern macro StringBuiltinsAssembler::SearchTwoByteStringInTwoByteString( 322 RawPtr<char16>, intptr, RawPtr<char16>, intptr, intptr): intptr; 323extern macro StringBuiltinsAssembler::SearchTwoByteStringInOneByteString( 324 RawPtr<char8>, intptr, RawPtr<char16>, intptr, intptr): intptr; 325extern macro StringBuiltinsAssembler::SearchOneByteInOneByteString( 326 RawPtr<char8>, intptr, RawPtr<char8>, intptr): intptr; 327 328macro AbstractStringIndexOf( 329 subject: RawPtr<char16>, subjectLen: intptr, search: RawPtr<char8>, 330 searchLen: intptr, fromIndex: intptr): intptr { 331 return SearchOneByteStringInTwoByteString( 332 subject, subjectLen, search, searchLen, fromIndex); 333} 334macro AbstractStringIndexOf( 335 subject: RawPtr<char8>, subjectLen: intptr, search: RawPtr<char8>, 336 searchLen: intptr, fromIndex: intptr): intptr { 337 if (searchLen == 1) { 338 return SearchOneByteInOneByteString(subject, subjectLen, search, fromIndex); 339 } 340 return SearchOneByteStringInOneByteString( 341 subject, subjectLen, search, searchLen, fromIndex); 342} 343macro AbstractStringIndexOf( 344 subject: RawPtr<char16>, subjectLen: intptr, search: RawPtr<char16>, 345 searchLen: intptr, fromIndex: intptr): intptr { 346 return SearchTwoByteStringInTwoByteString( 347 subject, subjectLen, search, searchLen, fromIndex); 348} 349macro AbstractStringIndexOf( 350 subject: RawPtr<char8>, subjectLen: intptr, search: RawPtr<char16>, 351 searchLen: intptr, fromIndex: intptr): intptr { 352 return SearchTwoByteStringInOneByteString( 353 subject, subjectLen, search, searchLen, fromIndex); 354} 355 356struct AbstractStringIndexOfFunctor { 357 fromIndex: Smi; 358} 359// Ideally, this would be a method of AbstractStringIndexOfFunctor, but 360// currently methods don't support templates. 361macro Call<A: type, B: type>( 362 self: AbstractStringIndexOfFunctor, string: ConstSlice<A>, 363 searchStr: ConstSlice<B>): Smi { 364 return Convert<Smi>(AbstractStringIndexOf( 365 string.GCUnsafeStartPointer(), string.length, 366 searchStr.GCUnsafeStartPointer(), searchStr.length, 367 Convert<intptr>(self.fromIndex))); 368} 369 370macro AbstractStringIndexOf(implicit context: Context)( 371 string: String, searchString: String, fromIndex: Smi): Smi { 372 // Special case the empty string. 373 const searchStringLength = searchString.length_intptr; 374 const stringLength = string.length_intptr; 375 if (searchStringLength == 0 && SmiUntag(fromIndex) <= stringLength) { 376 return fromIndex; 377 } 378 379 // Don't bother to search if the searchString would go past the end 380 // of the string. This is actually necessary because of runtime 381 // checks. 382 if (SmiUntag(fromIndex) + searchStringLength > stringLength) { 383 return -1; 384 } 385 386 return TwoStringsToSlices<Smi>( 387 string, searchString, AbstractStringIndexOfFunctor{fromIndex: fromIndex}); 388} 389 390builtin StringIndexOf(implicit context: Context)( 391 s: String, searchString: String, start: Smi): Smi { 392 return AbstractStringIndexOf(s, searchString, SmiMax(start, 0)); 393} 394