1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_STRINGS_STRING_BUILDER_INL_H_
6 #define V8_STRINGS_STRING_BUILDER_INL_H_
7
8 #include "src/common/assert-scope.h"
9 #include "src/execution/isolate.h"
10 #include "src/handles/handles-inl.h"
11 #include "src/heap/factory.h"
12 #include "src/objects/fixed-array.h"
13 #include "src/objects/objects.h"
14 #include "src/objects/string-inl.h"
15 #include "src/utils/utils.h"
16
17 namespace v8 {
18 namespace internal {
19
20 const int kStringBuilderConcatHelperLengthBits = 11;
21 const int kStringBuilderConcatHelperPositionBits = 19;
22
23 using StringBuilderSubstringLength =
24 base::BitField<int, 0, kStringBuilderConcatHelperLengthBits>;
25 using StringBuilderSubstringPosition =
26 base::BitField<int, kStringBuilderConcatHelperLengthBits,
27 kStringBuilderConcatHelperPositionBits>;
28
29 template <typename sinkchar>
30 void StringBuilderConcatHelper(String special, sinkchar* sink,
31 FixedArray fixed_array, int array_length);
32
33 // Returns the result length of the concatenation.
34 // On illegal argument, -1 is returned.
35 int StringBuilderConcatLength(int special_length, FixedArray fixed_array,
36 int array_length, bool* one_byte);
37
38 class FixedArrayBuilder {
39 public:
40 explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity);
41 explicit FixedArrayBuilder(Handle<FixedArray> backing_store);
42
43 bool HasCapacity(int elements);
44 void EnsureCapacity(Isolate* isolate, int elements);
45
46 void Add(Object value);
47 void Add(Smi value);
48
array()49 Handle<FixedArray> array() { return array_; }
50
length()51 int length() { return length_; }
52
53 int capacity();
54
55 Handle<JSArray> ToJSArray(Handle<JSArray> target_array);
56
57 private:
58 Handle<FixedArray> array_;
59 int length_;
60 bool has_non_smi_elements_;
61 };
62
63 class ReplacementStringBuilder {
64 public:
65 ReplacementStringBuilder(Heap* heap, Handle<String> subject,
66 int estimated_part_count);
67
68 // Caution: Callers must ensure the builder has enough capacity.
AddSubjectSlice(FixedArrayBuilder * builder,int from,int to)69 static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from,
70 int to) {
71 DCHECK_GE(from, 0);
72 int length = to - from;
73 DCHECK_GT(length, 0);
74 if (StringBuilderSubstringLength::is_valid(length) &&
75 StringBuilderSubstringPosition::is_valid(from)) {
76 int encoded_slice = StringBuilderSubstringLength::encode(length) |
77 StringBuilderSubstringPosition::encode(from);
78 builder->Add(Smi::FromInt(encoded_slice));
79 } else {
80 // Otherwise encode as two smis.
81 builder->Add(Smi::FromInt(-length));
82 builder->Add(Smi::FromInt(from));
83 }
84 }
85
AddSubjectSlice(int from,int to)86 void AddSubjectSlice(int from, int to) {
87 EnsureCapacity(2); // Subject slices are encoded with up to two smis.
88 AddSubjectSlice(&array_builder_, from, to);
89 IncrementCharacterCount(to - from);
90 }
91
92 void AddString(Handle<String> string);
93
94 MaybeHandle<String> ToString();
95
IncrementCharacterCount(int by)96 void IncrementCharacterCount(int by) {
97 if (character_count_ > String::kMaxLength - by) {
98 STATIC_ASSERT(String::kMaxLength < kMaxInt);
99 character_count_ = kMaxInt;
100 } else {
101 character_count_ += by;
102 }
103 }
104
105 private:
106 void AddElement(Handle<Object> element);
107 void EnsureCapacity(int elements);
108
109 Heap* heap_;
110 FixedArrayBuilder array_builder_;
111 Handle<String> subject_;
112 int character_count_;
113 bool is_one_byte_;
114 };
115
116 class IncrementalStringBuilder {
117 public:
118 explicit IncrementalStringBuilder(Isolate* isolate);
119
CurrentEncoding()120 V8_INLINE String::Encoding CurrentEncoding() { return encoding_; }
121
122 template <typename SrcChar, typename DestChar>
123 V8_INLINE void Append(SrcChar c);
124
AppendCharacter(uint8_t c)125 V8_INLINE void AppendCharacter(uint8_t c) {
126 if (encoding_ == String::ONE_BYTE_ENCODING) {
127 Append<uint8_t, uint8_t>(c);
128 } else {
129 Append<uint8_t, base::uc16>(c);
130 }
131 }
132
133 template <int N>
AppendCStringLiteral(const char (& literal)[N])134 V8_INLINE void AppendCStringLiteral(const char (&literal)[N]) {
135 // Note that the literal contains the zero char.
136 const int length = N - 1;
137 STATIC_ASSERT(length > 0);
138 if (length == 1) return AppendCharacter(literal[0]);
139 if (encoding_ == String::ONE_BYTE_ENCODING && CurrentPartCanFit(N)) {
140 const uint8_t* chars = reinterpret_cast<const uint8_t*>(literal);
141 SeqOneByteString::cast(*current_part_)
142 .SeqOneByteStringSetChars(current_index_, chars, length);
143 current_index_ += length;
144 if (current_index_ == part_length_) Extend();
145 DCHECK(HasValidCurrentIndex());
146 return;
147 }
148 return AppendCString(literal);
149 }
150
AppendCString(const char * s)151 V8_INLINE void AppendCString(const char* s) {
152 const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
153 if (encoding_ == String::ONE_BYTE_ENCODING) {
154 while (*u != '\0') Append<uint8_t, uint8_t>(*(u++));
155 } else {
156 while (*u != '\0') Append<uint8_t, base::uc16>(*(u++));
157 }
158 }
159
AppendCString(const base::uc16 * s)160 V8_INLINE void AppendCString(const base::uc16* s) {
161 if (encoding_ == String::ONE_BYTE_ENCODING) {
162 while (*s != '\0') Append<base::uc16, uint8_t>(*(s++));
163 } else {
164 while (*s != '\0') Append<base::uc16, base::uc16>(*(s++));
165 }
166 }
167
AppendInt(int i)168 V8_INLINE void AppendInt(int i) {
169 char buffer[kIntToCStringBufferSize];
170 const char* str =
171 IntToCString(i, base::Vector<char>(buffer, kIntToCStringBufferSize));
172 AppendCString(str);
173 }
174
CurrentPartCanFit(int length)175 V8_INLINE bool CurrentPartCanFit(int length) {
176 return part_length_ - current_index_ > length;
177 }
178
179 // We make a rough estimate to find out if the current string can be
180 // serialized without allocating a new string part. The worst case length of
181 // an escaped character is 6. Shifting the remaining string length right by 3
182 // is a more pessimistic estimate, but faster to calculate.
EscapedLengthIfCurrentPartFits(int length)183 V8_INLINE int EscapedLengthIfCurrentPartFits(int length) {
184 if (length > kMaxPartLength) return 0;
185 STATIC_ASSERT((kMaxPartLength << 3) <= String::kMaxLength);
186 // This shift will not overflow because length is already less than the
187 // maximum part length.
188 int worst_case_length = length << 3;
189 return CurrentPartCanFit(worst_case_length) ? worst_case_length : 0;
190 }
191
192 void AppendString(Handle<String> string);
193
194 MaybeHandle<String> Finish();
195
HasOverflowed()196 V8_INLINE bool HasOverflowed() const { return overflowed_; }
197
198 int Length() const;
199
200 // Change encoding to two-byte.
ChangeEncoding()201 void ChangeEncoding() {
202 DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
203 ShrinkCurrentPart();
204 encoding_ = String::TWO_BYTE_ENCODING;
205 Extend();
206 }
207
208 template <typename DestChar>
209 class NoExtend {
210 public:
NoExtend(String string,int offset,const DisallowGarbageCollection & no_gc)211 NoExtend(String string, int offset,
212 const DisallowGarbageCollection& no_gc) {
213 DCHECK(string.IsSeqOneByteString() || string.IsSeqTwoByteString());
214 if (sizeof(DestChar) == 1) {
215 start_ = reinterpret_cast<DestChar*>(
216 SeqOneByteString::cast(string).GetChars(no_gc) + offset);
217 } else {
218 start_ = reinterpret_cast<DestChar*>(
219 SeqTwoByteString::cast(string).GetChars(no_gc) + offset);
220 }
221 cursor_ = start_;
222 #ifdef DEBUG
223 string_ = string;
224 #endif
225 }
226
227 #ifdef DEBUG
~NoExtend()228 ~NoExtend() {
229 DestChar* end;
230 if (sizeof(DestChar) == 1) {
231 auto one_byte_string = SeqOneByteString::cast(string_);
232 end = reinterpret_cast<DestChar*>(one_byte_string.GetChars(no_gc_) +
233 one_byte_string.length());
234 } else {
235 auto two_byte_string = SeqTwoByteString::cast(string_);
236 end = reinterpret_cast<DestChar*>(two_byte_string.GetChars(no_gc_) +
237 two_byte_string.length());
238 }
239 DCHECK_LE(cursor_, end + 1);
240 }
241 #endif
242
Append(DestChar c)243 V8_INLINE void Append(DestChar c) { *(cursor_++) = c; }
AppendCString(const char * s)244 V8_INLINE void AppendCString(const char* s) {
245 const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
246 while (*u != '\0') Append(*(u++));
247 }
248
written()249 int written() { return static_cast<int>(cursor_ - start_); }
250
251 private:
252 DestChar* start_;
253 DestChar* cursor_;
254 #ifdef DEBUG
255 String string_;
256 #endif
257 DISALLOW_GARBAGE_COLLECTION(no_gc_)
258 };
259
260 template <typename DestChar>
261 class NoExtendString : public NoExtend<DestChar> {
262 public:
NoExtendString(Handle<String> string,int required_length)263 NoExtendString(Handle<String> string, int required_length)
264 : NoExtend<DestChar>(string, 0), string_(string) {
265 DCHECK(string->length() >= required_length);
266 }
267
Finalize()268 Handle<String> Finalize() {
269 Handle<SeqString> string = Handle<SeqString>::cast(string_);
270 int length = NoExtend<DestChar>::written();
271 Handle<String> result = SeqString::Truncate(string, length);
272 string_ = Handle<String>();
273 return result;
274 }
275
276 private:
277 Handle<String> string_;
278 };
279
280 template <typename DestChar>
281 class NoExtendBuilder : public NoExtend<DestChar> {
282 public:
NoExtendBuilder(IncrementalStringBuilder * builder,int required_length,const DisallowGarbageCollection & no_gc)283 NoExtendBuilder(IncrementalStringBuilder* builder, int required_length,
284 const DisallowGarbageCollection& no_gc)
285 : NoExtend<DestChar>(*(builder->current_part()),
286 builder->current_index_, no_gc),
287 builder_(builder) {
288 DCHECK(builder->CurrentPartCanFit(required_length));
289 }
290
~NoExtendBuilder()291 ~NoExtendBuilder() {
292 builder_->current_index_ += NoExtend<DestChar>::written();
293 DCHECK(builder_->HasValidCurrentIndex());
294 }
295
296 private:
297 IncrementalStringBuilder* builder_;
298 };
299
isolate()300 Isolate* isolate() { return isolate_; }
301
302 private:
factory()303 Factory* factory() { return isolate_->factory(); }
304
accumulator()305 V8_INLINE Handle<String> accumulator() { return accumulator_; }
306
set_accumulator(Handle<String> string)307 V8_INLINE void set_accumulator(Handle<String> string) {
308 accumulator_.PatchValue(*string);
309 }
310
current_part()311 V8_INLINE Handle<String> current_part() { return current_part_; }
312
set_current_part(Handle<String> string)313 V8_INLINE void set_current_part(Handle<String> string) {
314 current_part_.PatchValue(*string);
315 }
316
317 // Add the current part to the accumulator.
318 void Accumulate(Handle<String> new_part);
319
320 // Finish the current part and allocate a new part.
321 void Extend();
322
323 bool HasValidCurrentIndex() const;
324
325 // Shrink current part to the right size.
ShrinkCurrentPart()326 void ShrinkCurrentPart() {
327 DCHECK(current_index_ < part_length_);
328 set_current_part(SeqString::Truncate(
329 Handle<SeqString>::cast(current_part()), current_index_));
330 }
331
332 void AppendStringByCopy(Handle<String> string);
333 bool CanAppendByCopy(Handle<String> string);
334
335 static const int kInitialPartLength = 32;
336 static const int kMaxPartLength = 16 * 1024;
337 static const int kPartLengthGrowthFactor = 2;
338 static const int kIntToCStringBufferSize = 100;
339
340 Isolate* isolate_;
341 String::Encoding encoding_;
342 bool overflowed_;
343 int part_length_;
344 int current_index_;
345 Handle<String> accumulator_;
346 Handle<String> current_part_;
347 };
348
349 template <typename SrcChar, typename DestChar>
Append(SrcChar c)350 void IncrementalStringBuilder::Append(SrcChar c) {
351 DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1);
352 if (sizeof(DestChar) == 1) {
353 DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
354 SeqOneByteString::cast(*current_part_)
355 .SeqOneByteStringSet(current_index_++, c);
356 } else {
357 DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_);
358 SeqTwoByteString::cast(*current_part_)
359 .SeqTwoByteStringSet(current_index_++, c);
360 }
361 if (current_index_ == part_length_) Extend();
362 DCHECK(HasValidCurrentIndex());
363 }
364 } // namespace internal
365 } // namespace v8
366
367 #endif // V8_STRINGS_STRING_BUILDER_INL_H_
368