1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "string-alloc-inl.h"
18
19 #include "arch/memcmp16.h"
20 #include "array-alloc-inl.h"
21 #include "base/array_ref.h"
22 #include "base/casts.h"
23 #include "base/stl_util.h"
24 #include "class-inl.h"
25 #include "dex/descriptors_names.h"
26 #include "dex/utf-inl.h"
27 #include "gc/accounting/card_table-inl.h"
28 #include "handle_scope-inl.h"
29 #include "intern_table.h"
30 #include "object-inl.h"
31 #include "runtime.h"
32 #include "string-inl.h"
33 #include "thread.h"
34
35 namespace art {
36 namespace mirror {
37
FastIndexOf(int32_t ch,int32_t start)38 int32_t String::FastIndexOf(int32_t ch, int32_t start) {
39 int32_t count = GetLength();
40 if (start < 0) {
41 start = 0;
42 } else if (start > count) {
43 start = count;
44 }
45 if (IsCompressed()) {
46 return FastIndexOf<uint8_t>(GetValueCompressed(), ch, start);
47 } else {
48 return FastIndexOf<uint16_t>(GetValue(), ch, start);
49 }
50 }
51
ComputeAndSetHashCode()52 int32_t String::ComputeAndSetHashCode() {
53 int32_t new_hash_code = ComputeHashCode();
54 SetHashCode(new_hash_code);
55 return new_hash_code;
56 }
57
AllASCIIExcept(const uint16_t * chars,int32_t length,uint16_t non_ascii)58 inline bool String::AllASCIIExcept(const uint16_t* chars, int32_t length, uint16_t non_ascii) {
59 DCHECK(!IsASCII(non_ascii));
60 for (int32_t i = 0; i < length; ++i) {
61 if (!IsASCII(chars[i]) && chars[i] != non_ascii) {
62 return false;
63 }
64 }
65 return true;
66 }
67
DoReplace(Thread * self,Handle<String> src,uint16_t old_c,uint16_t new_c)68 ObjPtr<String> String::DoReplace(Thread* self, Handle<String> src, uint16_t old_c, uint16_t new_c) {
69 int32_t length = src->GetLength();
70 DCHECK(src->IsCompressed()
71 ? ContainsElement(ArrayRef<uint8_t>(src->value_compressed_, length), old_c)
72 : ContainsElement(ArrayRef<uint16_t>(src->value_, length), old_c));
73 bool compressible =
74 kUseStringCompression &&
75 IsASCII(new_c) &&
76 (src->IsCompressed() || (!IsASCII(old_c) && AllASCIIExcept(src->value_, length, old_c)));
77 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
78 const int32_t length_with_flag = String::GetFlaggedCount(length, compressible);
79
80 auto visitor = [=](ObjPtr<Object> obj, size_t usable_size) REQUIRES_SHARED(Locks::mutator_lock_) {
81 SetStringCountVisitor set_string_count_visitor(length_with_flag);
82 set_string_count_visitor(obj, usable_size);
83 ObjPtr<String> new_string = obj->AsString();
84 if (compressible) {
85 auto replace = [old_c, new_c](uint16_t c) {
86 return dchecked_integral_cast<uint8_t>((old_c != c) ? c : new_c);
87 };
88 uint8_t* out = new_string->value_compressed_;
89 if (LIKELY(src->IsCompressed())) { // LIKELY(compressible == src->IsCompressed())
90 std::transform(src->value_compressed_, src->value_compressed_ + length, out, replace);
91 } else {
92 std::transform(src->value_, src->value_ + length, out, replace);
93 }
94 DCHECK(kUseStringCompression && AllASCII(out, length));
95 } else {
96 auto replace = [old_c, new_c](uint16_t c) {
97 return (old_c != c) ? c : new_c;
98 };
99 uint16_t* out = new_string->value_;
100 if (UNLIKELY(src->IsCompressed())) { // LIKELY(compressible == src->IsCompressed())
101 std::transform(src->value_compressed_, src->value_compressed_ + length, out, replace);
102 } else {
103 std::transform(src->value_, src->value_ + length, out, replace);
104 }
105 DCHECK_IMPLIES(kUseStringCompression, !AllASCII(out, length));
106 }
107 };
108 return Alloc(self, length_with_flag, allocator_type, visitor);
109 }
110
DoConcat(Thread * self,Handle<String> h_this,Handle<String> h_arg)111 ObjPtr<String> String::DoConcat(Thread* self, Handle<String> h_this, Handle<String> h_arg) {
112 int32_t length_this = h_this->GetLength();
113 int32_t length_arg = h_arg->GetLength();
114 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
115 const bool compressible =
116 kUseStringCompression && (h_this->IsCompressed() && h_arg->IsCompressed());
117 const int32_t length_with_flag = String::GetFlaggedCount(length_this + length_arg, compressible);
118
119 auto visitor = [=](ObjPtr<Object> obj, size_t usable_size) REQUIRES_SHARED(Locks::mutator_lock_) {
120 SetStringCountVisitor set_string_count_visitor(length_with_flag);
121 set_string_count_visitor(obj, usable_size);
122 ObjPtr<String> new_string = obj->AsString();
123 if (compressible) {
124 uint8_t* new_value = new_string->GetValueCompressed();
125 memcpy(new_value, h_this->GetValueCompressed(), length_this * sizeof(uint8_t));
126 memcpy(new_value + length_this, h_arg->GetValueCompressed(), length_arg * sizeof(uint8_t));
127 } else {
128 uint16_t* new_value = new_string->GetValue();
129 if (h_this->IsCompressed()) {
130 const uint8_t* value_this = h_this->GetValueCompressed();
131 for (int i = 0; i < length_this; ++i) {
132 new_value[i] = value_this[i];
133 }
134 } else {
135 memcpy(new_value, h_this->GetValue(), length_this * sizeof(uint16_t));
136 }
137 if (h_arg->IsCompressed()) {
138 const uint8_t* value_arg = h_arg->GetValueCompressed();
139 for (int i = 0; i < length_arg; ++i) {
140 new_value[i + length_this] = value_arg[i];
141 }
142 } else {
143 memcpy(new_value + length_this, h_arg->GetValue(), length_arg * sizeof(uint16_t));
144 }
145 }
146 };
147 return Alloc(self, length_with_flag, allocator_type, visitor);
148 }
149
150 template<typename T>
RepeatCharacters(ObjPtr<String> new_string,Handle<String> h_this,int32_t count)151 static void RepeatCharacters(ObjPtr<String> new_string, Handle<String> h_this, int32_t count)
152 REQUIRES_SHARED(Locks::mutator_lock_) {
153 T *new_value, *h_this_value;
154 if constexpr (std::is_same_v<T, uint8_t>) {
155 new_value = new_string->GetValueCompressed();
156 h_this_value = h_this->GetValueCompressed();
157 } else {
158 new_value = new_string->GetValue();
159 h_this_value = h_this->GetValue();
160 }
161 int32_t length_this = h_this->GetLength();
162 if (length_this == 1) {
163 // compiler is smart enough to use memset for uint8_t
164 std::fill(new_value, new_value + count, h_this_value[0]);
165 } else {
166 memcpy(new_value, h_this_value, length_this * sizeof(T));
167 int32_t copied = length_this;
168 int32_t limit = length_this * count;
169 for (; copied < limit - copied; copied <<= 1) {
170 memcpy(new_value + copied, new_value, copied * sizeof(T));
171 }
172 memcpy(new_value + copied, new_value, (limit - copied) * sizeof(T));
173 }
174 }
175
DoRepeat(Thread * self,Handle<String> h_this,int32_t count)176 ObjPtr<String> String::DoRepeat(Thread* self, Handle<String> h_this, int32_t count) {
177 int32_t length_this = h_this->GetLength();
178 DCHECK_GT(count, 1);
179 DCHECK_LE(length_this, std::numeric_limits<int32_t>::max() / count);
180 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
181 const bool compressible = kUseStringCompression && (h_this->IsCompressed());
182 const int32_t length_with_flag = String::GetFlaggedCount(length_this * count, compressible);
183
184 auto visitor = [=](ObjPtr<Object> obj, size_t usable_size) REQUIRES_SHARED(Locks::mutator_lock_) {
185 SetStringCountVisitor set_string_count_visitor(length_with_flag);
186 set_string_count_visitor(obj, usable_size);
187 ObjPtr<String> new_string = obj->AsString();
188
189 if (compressible) {
190 RepeatCharacters<uint8_t>(new_string, h_this, count);
191 } else {
192 RepeatCharacters<uint16_t>(new_string, h_this, count);
193 }
194 };
195 return Alloc(self, length_with_flag, allocator_type, visitor);
196 }
197
AllocFromUtf16(Thread * self,int32_t utf16_length,const uint16_t * utf16_data_in)198 ObjPtr<String> String::AllocFromUtf16(Thread* self,
199 int32_t utf16_length,
200 const uint16_t* utf16_data_in) {
201 CHECK_IMPLIES(utf16_data_in == nullptr, utf16_length == 0);
202 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
203 const bool compressible = kUseStringCompression &&
204 String::AllASCII<uint16_t>(utf16_data_in, utf16_length);
205 int32_t length_with_flag = String::GetFlaggedCount(utf16_length, compressible);
206
207 auto visitor = [=](ObjPtr<Object> obj, size_t usable_size) REQUIRES_SHARED(Locks::mutator_lock_) {
208 SetStringCountVisitor set_string_count_visitor(length_with_flag);
209 set_string_count_visitor(obj, usable_size);
210 ObjPtr<String> new_string = obj->AsString();
211 if (compressible) {
212 uint8_t* value = new_string->GetValueCompressed();
213 for (int i = 0; i < utf16_length; ++i) {
214 value[i] = static_cast<uint8_t>(utf16_data_in[i]);
215 }
216 } else {
217 memcpy(new_string->GetValue(), utf16_data_in, utf16_length * sizeof(uint16_t));
218 }
219 };
220 return Alloc(self, length_with_flag, allocator_type, visitor);
221 }
222
AllocFromModifiedUtf8(Thread * self,const char * utf)223 ObjPtr<String> String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
224 DCHECK(utf != nullptr);
225 size_t byte_count = strlen(utf);
226 size_t char_count = CountModifiedUtf8Chars(utf, byte_count);
227 return AllocFromModifiedUtf8(self, char_count, utf, byte_count);
228 }
229
AllocFromModifiedUtf8(Thread * self,int32_t utf16_length,const char * utf8_data_in)230 ObjPtr<String> String::AllocFromModifiedUtf8(Thread* self,
231 int32_t utf16_length,
232 const char* utf8_data_in) {
233 return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in));
234 }
235
AllocFromModifiedUtf8(Thread * self,int32_t utf16_length,const char * utf8_data_in,int32_t utf8_length)236 ObjPtr<String> String::AllocFromModifiedUtf8(Thread* self,
237 int32_t utf16_length,
238 const char* utf8_data_in,
239 int32_t utf8_length) {
240 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
241 const bool compressible = kUseStringCompression && (utf16_length == utf8_length);
242 const int32_t length_with_flag = String::GetFlaggedCount(utf16_length, compressible);
243
244 auto visitor = [=](ObjPtr<Object> obj, size_t usable_size) REQUIRES_SHARED(Locks::mutator_lock_) {
245 SetStringCountVisitor set_string_count_visitor(length_with_flag);
246 set_string_count_visitor(obj, usable_size);
247 ObjPtr<String> new_string = obj->AsString();
248 if (compressible) {
249 memcpy(new_string->GetValueCompressed(), utf8_data_in, utf16_length * sizeof(uint8_t));
250 } else {
251 uint16_t* utf16_data_out = new_string->GetValue();
252 ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length);
253 }
254 };
255 return Alloc(self, length_with_flag, allocator_type, visitor);
256 }
257
Equals(mirror::String * that)258 bool String::Equals(mirror::String* that) {
259 if (this == that) {
260 // Quick reference equality test
261 return true;
262 } else if (that == nullptr) {
263 // Null isn't an instanceof anything
264 return false;
265 } else if (this->GetCount() != that->GetCount()) {
266 // Quick length and compression inequality test
267 return false;
268 } else {
269 // Note: don't short circuit on hash code as we're presumably here as the
270 // hash code was already equal
271 if (this->IsCompressed()) {
272 return memcmp(this->GetValueCompressed(), that->GetValueCompressed(), this->GetLength()) == 0;
273 } else {
274 return memcmp(this->GetValue(), that->GetValue(), sizeof(uint16_t) * this->GetLength()) == 0;
275 }
276 }
277 }
278
Equals(const char * modified_utf8)279 bool String::Equals(const char* modified_utf8) {
280 const int32_t length = GetLength();
281 if (IsCompressed()) {
282 return strlen(modified_utf8) == dchecked_integral_cast<uint32_t>(length) &&
283 memcmp(modified_utf8, GetValueCompressed(), length) == 0;
284 }
285 const uint16_t* value = GetValue();
286 int32_t i = 0;
287 while (i < length) {
288 const uint32_t ch = GetUtf16FromUtf8(&modified_utf8);
289 if (ch == '\0') {
290 return false;
291 }
292
293 if (GetLeadingUtf16Char(ch) != value[i++]) {
294 return false;
295 }
296
297 const uint16_t trailing = GetTrailingUtf16Char(ch);
298 if (trailing != 0) {
299 if (i == length) {
300 return false;
301 }
302
303 if (value[i++] != trailing) {
304 return false;
305 }
306 }
307 }
308 return *modified_utf8 == '\0';
309 }
310
311 // Create a modified UTF-8 encoded std::string from a java/lang/String object.
ToModifiedUtf8()312 std::string String::ToModifiedUtf8() {
313 if (IsCompressed()) {
314 return std::string(reinterpret_cast<const char*>(GetValueCompressed()), GetLength());
315 } else {
316 size_t byte_count = GetModifiedUtf8Length();
317 std::string result(byte_count, static_cast<char>(0));
318 ConvertUtf16ToModifiedUtf8(&result[0], byte_count, GetValue(), GetLength());
319 return result;
320 }
321 }
322
CompareTo(ObjPtr<String> rhs)323 int32_t String::CompareTo(ObjPtr<String> rhs) {
324 // Quick test for comparison of a string with itself.
325 ObjPtr<String> lhs = this;
326 if (lhs == rhs) {
327 return 0;
328 }
329 int32_t lhs_count = lhs->GetLength();
330 int32_t rhs_count = rhs->GetLength();
331 int32_t count_diff = lhs_count - rhs_count;
332 int32_t min_count = (count_diff < 0) ? lhs_count : rhs_count;
333 if (lhs->IsCompressed() && rhs->IsCompressed()) {
334 const uint8_t* lhs_chars = lhs->GetValueCompressed();
335 const uint8_t* rhs_chars = rhs->GetValueCompressed();
336 for (int32_t i = 0; i < min_count; ++i) {
337 int32_t char_diff = static_cast<int32_t>(lhs_chars[i]) - static_cast<int32_t>(rhs_chars[i]);
338 if (char_diff != 0) {
339 return char_diff;
340 }
341 }
342 } else if (lhs->IsCompressed() || rhs->IsCompressed()) {
343 const uint8_t* compressed_chars =
344 lhs->IsCompressed() ? lhs->GetValueCompressed() : rhs->GetValueCompressed();
345 const uint16_t* uncompressed_chars = lhs->IsCompressed() ? rhs->GetValue() : lhs->GetValue();
346 for (int32_t i = 0; i < min_count; ++i) {
347 int32_t char_diff =
348 static_cast<int32_t>(compressed_chars[i]) - static_cast<int32_t>(uncompressed_chars[i]);
349 if (char_diff != 0) {
350 return lhs->IsCompressed() ? char_diff : -char_diff;
351 }
352 }
353 } else {
354 const uint16_t* lhs_chars = lhs->GetValue();
355 const uint16_t* rhs_chars = rhs->GetValue();
356 // FIXME: The MemCmp16() name is misleading. It returns the char difference on mismatch
357 // where memcmp() only guarantees that the returned value has the same sign.
358 int32_t char_diff = MemCmp16(lhs_chars, rhs_chars, min_count);
359 if (char_diff != 0) {
360 return char_diff;
361 }
362 }
363 return count_diff;
364 }
365
ToCharArray(Handle<String> h_this,Thread * self)366 ObjPtr<CharArray> String::ToCharArray(Handle<String> h_this, Thread* self) {
367 ObjPtr<CharArray> result = CharArray::Alloc(self, h_this->GetLength());
368 if (result != nullptr) {
369 if (h_this->IsCompressed()) {
370 int32_t length = h_this->GetLength();
371 const uint8_t* src = h_this->GetValueCompressed();
372 uint16_t* dest = result->GetData();
373 for (int i = 0; i < length; ++i) {
374 dest[i] = src[i];
375 }
376 } else {
377 memcpy(result->GetData(), h_this->GetValue(), h_this->GetLength() * sizeof(uint16_t));
378 }
379 } else {
380 self->AssertPendingOOMException();
381 }
382 return result;
383 }
384
GetChars(int32_t start,int32_t end,Handle<CharArray> array,int32_t index)385 void String::GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index) {
386 uint16_t* data = array->GetData() + index;
387 DCHECK_LE(start, end);
388 int32_t length = end - start;
389 if (IsCompressed()) {
390 const uint8_t* value = GetValueCompressed() + start;
391 for (int i = 0; i < length; ++i) {
392 data[i] = value[i];
393 }
394 } else {
395 uint16_t* value = GetValue() + start;
396 memcpy(data, value, length * sizeof(uint16_t));
397 }
398 }
399
FillBytesLatin1(Handle<ByteArray> array,int32_t index)400 void String::FillBytesLatin1(Handle<ByteArray> array, int32_t index) {
401 int8_t* data = array->GetData() + index;
402 int32_t length = GetLength();
403 if (IsCompressed()) {
404 const uint8_t* value = GetValueCompressed();
405 memcpy(data, value, length * sizeof(uint8_t));
406 } else {
407 // Drop the high byte of the characters.
408 // The caller should check that all dropped high bytes are zeros.
409 const uint16_t* value = GetValue();
410 for (int32_t i = 0; i < length; ++i) {
411 data[i] = static_cast<int8_t>(dchecked_integral_cast<uint8_t>(value[i]));
412 }
413 }
414 }
415
FillBytesUTF16(Handle<ByteArray> array,int32_t index)416 void String::FillBytesUTF16(Handle<ByteArray> array, int32_t index) {
417 int8_t* data = array->GetData() + index;
418 int32_t length = GetLength();
419 if (IsCompressed()) {
420 const uint8_t* value = GetValueCompressed();
421 uint32_t d_index = 0;
422 for (int i = 0; i < length; ++i) {
423 data[d_index++] = static_cast<int8_t>(value[i]);
424 data[d_index++] = 0;
425 }
426 } else {
427 const uint16_t* value = GetValue();
428 memcpy(data, value, length * sizeof(uint16_t));
429 }
430 }
431
IsValueNull()432 bool String::IsValueNull() {
433 return (IsCompressed()) ? (GetValueCompressed() == nullptr) : (GetValue() == nullptr);
434 }
435
PrettyStringDescriptor(ObjPtr<mirror::String> java_descriptor)436 std::string String::PrettyStringDescriptor(ObjPtr<mirror::String> java_descriptor) {
437 if (java_descriptor == nullptr) {
438 return "null";
439 }
440 return java_descriptor->PrettyStringDescriptor();
441 }
442
PrettyStringDescriptor()443 std::string String::PrettyStringDescriptor() {
444 return PrettyDescriptor(ToModifiedUtf8().c_str());
445 }
446
Intern()447 ObjPtr<String> String::Intern() {
448 return Runtime::Current()->GetInternTable()->InternWeak(this);
449 }
450
451 } // namespace mirror
452 } // namespace art
453