1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_STRING_BUILDER_INL_H_
6 #define V8_STRING_BUILDER_INL_H_
7
8 #include "src/assert-scope.h"
9 #include "src/handles-inl.h"
10 #include "src/heap/factory.h"
11 #include "src/isolate.h"
12 #include "src/objects.h"
13 #include "src/objects/fixed-array.h"
14 #include "src/objects/string-inl.h"
15 #include "src/utils.h"
16
17 namespace v8 {
18 namespace internal {
19
20 const int kStringBuilderConcatHelperLengthBits = 11;
21 const int kStringBuilderConcatHelperPositionBits = 19;
22
23 typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits>
24 StringBuilderSubstringLength;
25 typedef BitField<int, kStringBuilderConcatHelperLengthBits,
26 kStringBuilderConcatHelperPositionBits>
27 StringBuilderSubstringPosition;
28
29 template <typename sinkchar>
30 void StringBuilderConcatHelper(String* special, sinkchar* sink,
31 FixedArray* fixed_array, int array_length);
32
33 // Returns the result length of the concatenation.
34 // On illegal argument, -1 is returned.
35 int StringBuilderConcatLength(int special_length, FixedArray* fixed_array,
36 int array_length, bool* one_byte);
37
38 class FixedArrayBuilder {
39 public:
40 explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity);
41 explicit FixedArrayBuilder(Handle<FixedArray> backing_store);
42
43 bool HasCapacity(int elements);
44 void EnsureCapacity(Isolate* isolate, int elements);
45
46 void Add(Object* value);
47 void Add(Smi* value);
48
array()49 Handle<FixedArray> array() { return array_; }
50
length()51 int length() { return length_; }
52
53 int capacity();
54
55 Handle<JSArray> ToJSArray(Handle<JSArray> target_array);
56
57 private:
58 Handle<FixedArray> array_;
59 int length_;
60 bool has_non_smi_elements_;
61 };
62
63 class ReplacementStringBuilder {
64 public:
65 ReplacementStringBuilder(Heap* heap, Handle<String> subject,
66 int estimated_part_count);
67
AddSubjectSlice(FixedArrayBuilder * builder,int from,int to)68 static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from,
69 int to) {
70 DCHECK_GE(from, 0);
71 int length = to - from;
72 DCHECK_GT(length, 0);
73 if (StringBuilderSubstringLength::is_valid(length) &&
74 StringBuilderSubstringPosition::is_valid(from)) {
75 int encoded_slice = StringBuilderSubstringLength::encode(length) |
76 StringBuilderSubstringPosition::encode(from);
77 builder->Add(Smi::FromInt(encoded_slice));
78 } else {
79 // Otherwise encode as two smis.
80 builder->Add(Smi::FromInt(-length));
81 builder->Add(Smi::FromInt(from));
82 }
83 }
84
85 void EnsureCapacity(int elements);
86
AddSubjectSlice(int from,int to)87 void AddSubjectSlice(int from, int to) {
88 AddSubjectSlice(&array_builder_, from, to);
89 IncrementCharacterCount(to - from);
90 }
91
92 void AddString(Handle<String> string);
93
94 MaybeHandle<String> ToString();
95
IncrementCharacterCount(int by)96 void IncrementCharacterCount(int by) {
97 if (character_count_ > String::kMaxLength - by) {
98 STATIC_ASSERT(String::kMaxLength < kMaxInt);
99 character_count_ = kMaxInt;
100 } else {
101 character_count_ += by;
102 }
103 }
104
105 private:
106 void AddElement(Object* element);
107
108 Heap* heap_;
109 FixedArrayBuilder array_builder_;
110 Handle<String> subject_;
111 int character_count_;
112 bool is_one_byte_;
113 };
114
115 class IncrementalStringBuilder {
116 public:
117 explicit IncrementalStringBuilder(Isolate* isolate);
118
CurrentEncoding()119 V8_INLINE String::Encoding CurrentEncoding() { return encoding_; }
120
121 template <typename SrcChar, typename DestChar>
122 V8_INLINE void Append(SrcChar c);
123
AppendCharacter(uint8_t c)124 V8_INLINE void AppendCharacter(uint8_t c) {
125 if (encoding_ == String::ONE_BYTE_ENCODING) {
126 Append<uint8_t, uint8_t>(c);
127 } else {
128 Append<uint8_t, uc16>(c);
129 }
130 }
131
AppendCString(const char * s)132 V8_INLINE void AppendCString(const char* s) {
133 const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
134 if (encoding_ == String::ONE_BYTE_ENCODING) {
135 while (*u != '\0') Append<uint8_t, uint8_t>(*(u++));
136 } else {
137 while (*u != '\0') Append<uint8_t, uc16>(*(u++));
138 }
139 }
140
AppendCString(const uc16 * s)141 V8_INLINE void AppendCString(const uc16* s) {
142 if (encoding_ == String::ONE_BYTE_ENCODING) {
143 while (*s != '\0') Append<uc16, uint8_t>(*(s++));
144 } else {
145 while (*s != '\0') Append<uc16, uc16>(*(s++));
146 }
147 }
148
CurrentPartCanFit(int length)149 V8_INLINE bool CurrentPartCanFit(int length) {
150 return part_length_ - current_index_ > length;
151 }
152
153 // We make a rough estimate to find out if the current string can be
154 // serialized without allocating a new string part. The worst case length of
155 // an escaped character is 6. Shifting the remaining string length right by 3
156 // is a more pessimistic estimate, but faster to calculate.
EscapedLengthIfCurrentPartFits(int length)157 V8_INLINE int EscapedLengthIfCurrentPartFits(int length) {
158 if (length > kMaxPartLength) return 0;
159 STATIC_ASSERT((kMaxPartLength << 3) <= String::kMaxLength);
160 // This shift will not overflow because length is already less than the
161 // maximum part length.
162 int worst_case_length = length << 3;
163 return CurrentPartCanFit(worst_case_length) ? worst_case_length : 0;
164 }
165
166 void AppendString(Handle<String> string);
167
168 MaybeHandle<String> Finish();
169
HasOverflowed()170 V8_INLINE bool HasOverflowed() const { return overflowed_; }
171
172 int Length() const;
173
174 // Change encoding to two-byte.
ChangeEncoding()175 void ChangeEncoding() {
176 DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
177 ShrinkCurrentPart();
178 encoding_ = String::TWO_BYTE_ENCODING;
179 Extend();
180 }
181
182 template <typename DestChar>
183 class NoExtend {
184 public:
NoExtend(Handle<String> string,int offset)185 explicit NoExtend(Handle<String> string, int offset) {
186 DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString());
187 if (sizeof(DestChar) == 1) {
188 start_ = reinterpret_cast<DestChar*>(
189 Handle<SeqOneByteString>::cast(string)->GetChars() + offset);
190 } else {
191 start_ = reinterpret_cast<DestChar*>(
192 Handle<SeqTwoByteString>::cast(string)->GetChars() + offset);
193 }
194 cursor_ = start_;
195 }
196
Append(DestChar c)197 V8_INLINE void Append(DestChar c) { *(cursor_++) = c; }
AppendCString(const char * s)198 V8_INLINE void AppendCString(const char* s) {
199 const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
200 while (*u != '\0') Append(*(u++));
201 }
202
written()203 int written() { return static_cast<int>(cursor_ - start_); }
204
205 private:
206 DestChar* start_;
207 DestChar* cursor_;
208 DisallowHeapAllocation no_gc_;
209 };
210
211 template <typename DestChar>
212 class NoExtendString : public NoExtend<DestChar> {
213 public:
NoExtendString(Handle<String> string,int required_length)214 NoExtendString(Handle<String> string, int required_length)
215 : NoExtend<DestChar>(string, 0), string_(string) {
216 DCHECK(string->length() >= required_length);
217 }
218
Finalize()219 Handle<String> Finalize() {
220 Handle<SeqString> string = Handle<SeqString>::cast(string_);
221 int length = NoExtend<DestChar>::written();
222 Handle<String> result = SeqString::Truncate(string, length);
223 string_ = Handle<String>();
224 return result;
225 }
226
227 private:
228 Handle<String> string_;
229 };
230
231 template <typename DestChar>
232 class NoExtendBuilder : public NoExtend<DestChar> {
233 public:
NoExtendBuilder(IncrementalStringBuilder * builder,int required_length)234 NoExtendBuilder(IncrementalStringBuilder* builder, int required_length)
235 : NoExtend<DestChar>(builder->current_part(), builder->current_index_),
236 builder_(builder) {
237 DCHECK(builder->CurrentPartCanFit(required_length));
238 }
239
~NoExtendBuilder()240 ~NoExtendBuilder() {
241 builder_->current_index_ += NoExtend<DestChar>::written();
242 }
243
244 private:
245 IncrementalStringBuilder* builder_;
246 };
247
248 private:
factory()249 Factory* factory() { return isolate_->factory(); }
250
accumulator()251 V8_INLINE Handle<String> accumulator() { return accumulator_; }
252
set_accumulator(Handle<String> string)253 V8_INLINE void set_accumulator(Handle<String> string) {
254 *accumulator_.location() = *string;
255 }
256
current_part()257 V8_INLINE Handle<String> current_part() { return current_part_; }
258
set_current_part(Handle<String> string)259 V8_INLINE void set_current_part(Handle<String> string) {
260 *current_part_.location() = *string;
261 }
262
263 // Add the current part to the accumulator.
264 void Accumulate(Handle<String> new_part);
265
266 // Finish the current part and allocate a new part.
267 void Extend();
268
269 // Shrink current part to the right size.
ShrinkCurrentPart()270 void ShrinkCurrentPart() {
271 DCHECK(current_index_ < part_length_);
272 set_current_part(SeqString::Truncate(
273 Handle<SeqString>::cast(current_part()), current_index_));
274 }
275
276 static const int kInitialPartLength = 32;
277 static const int kMaxPartLength = 16 * 1024;
278 static const int kPartLengthGrowthFactor = 2;
279
280 Isolate* isolate_;
281 String::Encoding encoding_;
282 bool overflowed_;
283 int part_length_;
284 int current_index_;
285 Handle<String> accumulator_;
286 Handle<String> current_part_;
287 };
288
289 template <typename SrcChar, typename DestChar>
Append(SrcChar c)290 void IncrementalStringBuilder::Append(SrcChar c) {
291 DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1);
292 if (sizeof(DestChar) == 1) {
293 DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
294 SeqOneByteString::cast(*current_part_)
295 ->SeqOneByteStringSet(current_index_++, c);
296 } else {
297 DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_);
298 SeqTwoByteString::cast(*current_part_)
299 ->SeqTwoByteStringSet(current_index_++, c);
300 }
301 if (current_index_ == part_length_) Extend();
302 }
303 } // namespace internal
304 } // namespace v8
305
306 #endif // V8_STRING_BUILDER_INL_H_
307