1 /**
2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <ctime>
17
18 #include "gtest/gtest.h"
19 #include "libpandabase/utils/span.h"
20 #include "libpandabase/utils/utf.h"
21 #include "libpandabase/utils/utils.h"
22 #include "runtime/include/class_linker_extension.h"
23 #include "runtime/include/coretypes/array-inl.h"
24 #include "runtime/include/coretypes/string-inl.h"
25 #include "runtime/include/runtime.h"
26 #include "runtime/include/thread.h"
27
28 // NOLINTBEGIN(readability-magic-numbers)
29
30 namespace ark::coretypes::test {
31
32 class StringTest : public testing::Test {
33 public:
StringTest()34 StringTest()
35 {
36 #ifdef PANDA_NIGHTLY_TEST_ON
37 seed_ = std::time(NULL);
38 #else
39 seed_ = 0xDEADBEEF;
40 #endif
41 srand(seed_);
42 // We need to create a runtime instance to be able to create strings.
43 options_.SetShouldLoadBootPandaFiles(false);
44 options_.SetShouldInitializeIntrinsics(false);
45 Runtime::Create(options_);
46 }
47
~StringTest()48 ~StringTest() override
49 {
50 Runtime::Destroy();
51 }
52
53 NO_COPY_SEMANTIC(StringTest);
54 NO_MOVE_SEMANTIC(StringTest);
55
GetLanguageContext()56 LanguageContext GetLanguageContext()
57 {
58 return Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
59 }
60
SetUp()61 void SetUp() override
62 {
63 thread_ = ark::MTManagedThread::GetCurrent();
64 thread_->ManagedCodeBegin();
65 }
66
TearDown()67 void TearDown() override
68 {
69 thread_->ManagedCodeEnd();
70 }
71
72 protected:
73 static constexpr uint32_t SIMPLE_UTF8_STRING_LENGTH = 13;
74 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
75 static constexpr char SIMPLE_UTF8_STRING[SIMPLE_UTF8_STRING_LENGTH + 1] = "Hello, world!";
76
77 private:
78 ark::MTManagedThread *thread_ {};
79 unsigned seed_ {};
80 RuntimeOptions options_;
81 };
82
TEST_F(StringTest,EqualStringWithCompressedRawUtf8Data)83 TEST_F(StringTest, EqualStringWithCompressedRawUtf8Data)
84 {
85 std::vector<uint8_t> data {0x01, 0x05, 0x07, 0x00};
86 uint32_t utf16Length = data.size() - 1;
87 auto *firstString =
88 String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
89 ASSERT_TRUE(String::StringsAreEqualMUtf8(firstString, data.data(), utf16Length));
90 }
91
TEST_F(StringTest,EqualStringWithNotCompressedRawUtf8Data)92 TEST_F(StringTest, EqualStringWithNotCompressedRawUtf8Data)
93 {
94 std::vector<uint8_t> data {0xc2, 0xa7};
95
96 for (size_t i = 0; i < 20U; i++) {
97 data.push_back(0x30 + i);
98 }
99 data.push_back(0);
100
101 uint32_t utf16Length = data.size() - 2U;
102 auto *firstString =
103 String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
104 ASSERT_TRUE(String::StringsAreEqualMUtf8(firstString, data.data(), utf16Length));
105 }
106
TEST_F(StringTest,NotEqualStringWithNotCompressedRawUtf8Data)107 TEST_F(StringTest, NotEqualStringWithNotCompressedRawUtf8Data)
108 {
109 std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
110 std::vector<uint8_t> data2 {0xc2, 0xa7, 0x34, 0x00};
111 uint32_t utf16Length = 2;
112 auto *firstString =
113 String::CreateFromMUtf8(data1.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
114 ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length));
115 }
116
TEST_F(StringTest,NotEqualStringNotCompressedStringWithCompressedRawData)117 TEST_F(StringTest, NotEqualStringNotCompressedStringWithCompressedRawData)
118 {
119 std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
120 std::vector<uint8_t> data2 {0x02, 0x07, 0x04, 0x00};
121 uint32_t utf16Length1 = 2;
122 uint32_t utf16Length2 = 3;
123 auto *firstString =
124 String::CreateFromMUtf8(data1.data(), utf16Length1, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
125 ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length2));
126 }
127
TEST_F(StringTest,NotEqualCompressedStringWithUncompressedRawUtf8Data)128 TEST_F(StringTest, NotEqualCompressedStringWithUncompressedRawUtf8Data)
129 {
130 std::vector<uint8_t> data1 {0x02, 0x07, 0x04, 0x00};
131 std::vector<uint8_t> data2 {0xc2, 0xa7, 0x33, 0x00};
132 uint32_t utf16Length1 = 3;
133 uint32_t utf16Length2 = 2;
134 auto *firstString =
135 String::CreateFromMUtf8(data1.data(), utf16Length1, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
136 ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length2));
137 }
138
TEST_F(StringTest,EqualStringWithMUtf8DifferentLength)139 TEST_F(StringTest, EqualStringWithMUtf8DifferentLength)
140 {
141 std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
142 std::vector<uint8_t> data2 {0xc2, 0xa7, 0x00};
143 uint32_t utf16Length = 2;
144 auto *firstString =
145 String::CreateFromMUtf8(data1.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
146 ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length - 1));
147 }
148
TEST_F(StringTest,EqualStringWithRawUtf16Data)149 TEST_F(StringTest, EqualStringWithRawUtf16Data)
150 {
151 std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
152 auto *firstString =
153 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
154 auto secondString = reinterpret_cast<const uint16_t *>(data.data());
155 ASSERT_TRUE(String::StringsAreEqualUtf16(firstString, secondString, data.size()));
156 }
157
TEST_F(StringTest,CompareCompressedStringWithRawUtf16)158 TEST_F(StringTest, CompareCompressedStringWithRawUtf16)
159 {
160 std::vector<uint16_t> data;
161
162 for (size_t i = 0; i < 30U; i++) {
163 data.push_back(i + 1);
164 }
165 data.push_back(0);
166
167 auto *firstString = String::CreateFromUtf16(data.data(), data.size() - 1, GetLanguageContext(),
168 Runtime::GetCurrent()->GetPandaVM());
169 auto secondString = reinterpret_cast<const uint16_t *>(data.data());
170 ASSERT_TRUE(String::StringsAreEqualUtf16(firstString, secondString, data.size() - 1));
171 }
172
TEST_F(StringTest,EqualStringWithRawUtf16DifferentLength)173 TEST_F(StringTest, EqualStringWithRawUtf16DifferentLength)
174 {
175 std::vector<uint16_t> data1 {0xffc3, 0x33, 0x00};
176 std::vector<uint16_t> data2 {0xffc3, 0x33, 0x55, 0x00};
177 auto *firstString =
178 String::CreateFromUtf16(data1.data(), data1.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
179 auto secondString = reinterpret_cast<const uint16_t *>(data2.data());
180 ASSERT_FALSE(String::StringsAreEqualUtf16(firstString, secondString, data2.size()));
181 }
182
TEST_F(StringTest,NotEqualStringWithRawUtf16Data)183 TEST_F(StringTest, NotEqualStringWithRawUtf16Data)
184 {
185 std::vector<uint16_t> data1 {0xffc3, 0x33, 0x00};
186 std::vector<uint16_t> data2 {0xffc3, 0x34, 0x00};
187 auto *firstString =
188 String::CreateFromUtf16(data1.data(), data1.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
189
190 auto secondString = reinterpret_cast<const uint16_t *>(data2.data());
191 ASSERT_FALSE(String::StringsAreEqualUtf16(firstString, secondString, data2.size()));
192 }
193
TEST_F(StringTest,compressedHashCodeUtf8)194 TEST_F(StringTest, compressedHashCodeUtf8)
195 {
196 String *firstString =
197 String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
198 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
199 auto stringHashCode = firstString->GetHashcode();
200 auto rawHashCode =
201 String::ComputeHashcodeMutf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH);
202
203 ASSERT_EQ(stringHashCode, rawHashCode);
204 }
TEST_F(StringTest,notCompressedHashCodeUtf8)205 TEST_F(StringTest, notCompressedHashCodeUtf8)
206 {
207 std::vector<uint8_t> data {0xc2, 0xa7};
208
209 size_t size = 1;
210 for (size_t i = 0; i < 20U; i++) {
211 data.push_back(0x30 + i);
212 size += 1;
213 }
214 data.push_back(0);
215
216 String *firstString = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(data.data()), size,
217 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
218 auto stringHashCode = firstString->GetHashcode();
219 auto rawHashCode = String::ComputeHashcodeMutf8(reinterpret_cast<const uint8_t *>(data.data()), size);
220
221 ASSERT_EQ(stringHashCode, rawHashCode);
222 }
223
TEST_F(StringTest,compressedHashCodeUtf16)224 TEST_F(StringTest, compressedHashCodeUtf16)
225 {
226 std::vector<uint16_t> data;
227
228 size_t size = 30;
229 for (size_t i = 0; i < size; i++) {
230 data.push_back(i + 1);
231 }
232 data.push_back(0);
233
234 auto *firstString =
235 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
236 auto stringHashCode = firstString->GetHashcode();
237 auto rawHashCode = String::ComputeHashcodeUtf16(data.data(), data.size());
238 ASSERT_EQ(stringHashCode, rawHashCode);
239 }
240
TEST_F(StringTest,notCompressedHashCodeUtf16)241 TEST_F(StringTest, notCompressedHashCodeUtf16)
242 {
243 std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
244 auto *firstString =
245 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
246 auto stringHashCode = firstString->GetHashcode();
247 auto rawHashCode = String::ComputeHashcodeUtf16(data.data(), data.size());
248 ASSERT_EQ(stringHashCode, rawHashCode);
249 }
250
TEST_F(StringTest,lengthUtf8)251 TEST_F(StringTest, lengthUtf8)
252 {
253 String *string =
254 String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
255 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
256 ASSERT_EQ(string->GetLength(), SIMPLE_UTF8_STRING_LENGTH);
257 }
258
TEST_F(StringTest,lengthUtf16)259 TEST_F(StringTest, lengthUtf16)
260 {
261 std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
262 auto *string =
263 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
264 ASSERT_EQ(string->GetLength(), data.size());
265 }
266
TEST_F(StringTest,DifferentLengthStringCompareTest)267 TEST_F(StringTest, DifferentLengthStringCompareTest)
268 {
269 static constexpr uint32_t F_STRING_LENGTH = 8;
270 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
271 static constexpr char F_STRING[F_STRING_LENGTH + 1] = "Hello, w";
272 String *firstString =
273 String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
274 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
275 ASSERT_EQ(firstString->GetLength(), SIMPLE_UTF8_STRING_LENGTH);
276 String *secondString = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(F_STRING), F_STRING_LENGTH,
277 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
278 ASSERT_EQ(secondString->GetLength(), F_STRING_LENGTH);
279 ASSERT_EQ(String::StringsAreEqual(firstString, secondString), false);
280 }
281
TEST_F(StringTest,ForeignLengthAndCopyTest1b0)282 TEST_F(StringTest, ForeignLengthAndCopyTest1b0)
283 {
284 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0xc0, 0x80, 0x00};
285 uint32_t utf16Length = data.size();
286 String *string = String::CreateFromMUtf8(data.data(), utf16Length - 2U, GetLanguageContext(),
287 Runtime::GetCurrent()->GetPandaVM()); // c080 is U+0000
288 ASSERT_EQ(string->GetMUtf8Length(), data.size());
289 ASSERT_EQ(string->GetUtf16Length(), data.size() - 2U); // \0 doesn't counts for UTF16
290 std::vector<uint8_t> out8(data.size());
291 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
292 ASSERT_EQ(out8, data);
293 std::vector<uint16_t> res16 {'a', 'b', 'c', 'd', 'z', 0x00};
294 std::vector<uint16_t> out16(res16.size());
295 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
296 ASSERT_EQ(out16, res16);
297 }
298
TEST_F(StringTest,ForeignLengthAndCopyTest1b)299 TEST_F(StringTest, ForeignLengthAndCopyTest1b)
300 {
301 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0x7f, 0x00};
302 uint32_t utf16Length = data.size();
303 String *string = String::CreateFromMUtf8(data.data(), utf16Length - 1, GetLanguageContext(),
304 Runtime::GetCurrent()->GetPandaVM());
305 ASSERT_EQ(string->GetMUtf8Length(), data.size());
306 ASSERT_EQ(string->GetUtf16Length(), data.size() - 1); // \0 doesn't counts for UTF16
307 std::vector<uint8_t> out8(data.size());
308 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
309 ASSERT_EQ(out8, data);
310 std::vector<uint16_t> res16 {'a', 'b', 'c', 'd', 'z', 0x7f};
311 std::vector<uint16_t> out16(res16.size());
312 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
313 ASSERT_EQ(out16, res16);
314 }
315
TEST_F(StringTest,ForeignLengthAndCopyTest2b)316 TEST_F(StringTest, ForeignLengthAndCopyTest2b)
317 {
318 std::vector<uint8_t> data {0xc2, 0xa7, 0x33, 0x00}; // UTF-16 size is 2
319 String *string =
320 String::CreateFromMUtf8(data.data(), 2U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
321 ASSERT_EQ(string->GetMUtf8Length(), data.size());
322 ASSERT_EQ(string->GetUtf16Length(), 2U); // \0 doesn't counts for UTF16
323 std::vector<uint8_t> out8(data.size());
324 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
325 ASSERT_EQ(out8, data);
326 std::vector<uint16_t> res16 {0xa7, 0x33};
327 std::vector<uint16_t> out16(res16.size());
328 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
329 ASSERT_EQ(out16, res16);
330 }
331
TEST_F(StringTest,ForeignLengthAndCopyTest3b)332 TEST_F(StringTest, ForeignLengthAndCopyTest3b)
333 {
334 std::vector<uint8_t> data {0xef, 0xbf, 0x83, 0x33, 0x00}; // UTF-16 size is 2
335 String *string =
336 String::CreateFromMUtf8(data.data(), 2U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
337 ASSERT_EQ(string->GetMUtf8Length(), data.size());
338 ASSERT_EQ(string->GetUtf16Length(), 2U); // \0 doesn't counts for UTF16
339 std::vector<uint8_t> out8(data.size());
340 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
341 ASSERT_EQ(out8, data);
342 std::vector<uint16_t> res16 {0xffc3, 0x33};
343 std::vector<uint16_t> out16(res16.size());
344 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
345 ASSERT_EQ(out16, res16);
346 }
347
TEST_F(StringTest,ForeignLengthAndCopyTest6b)348 TEST_F(StringTest, ForeignLengthAndCopyTest6b)
349 {
350 std::vector<uint8_t> data {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7, 0x20, 0x00}; // UTF-16 size is 3
351 // We support 4-byte utf-8 sequences, so {0xd801, 0xdc37} is encoded to 4 bytes instead of 6
352 std::vector<uint8_t> utf8Data {0xf0, 0x90, 0x90, 0xb7, 0x20, 0x00};
353 String *string =
354 String::CreateFromMUtf8(data.data(), 3U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
355 ASSERT_EQ(string->GetMUtf8Length(), utf8Data.size());
356 ASSERT_EQ(string->GetUtf16Length(), 3U); // \0 doesn't counts for UTF16
357 std::vector<uint8_t> out8(utf8Data.size());
358 string->CopyDataMUtf8(out8.data(), out8.size(), true);
359 ASSERT_EQ(out8, utf8Data);
360 std::vector<uint16_t> res16 {0xd801, 0xdc37, 0x20};
361 std::vector<uint16_t> out16(res16.size());
362 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
363 ASSERT_EQ(out16, res16);
364 }
365
TEST_F(StringTest,RegionCopyTestMutf8)366 TEST_F(StringTest, RegionCopyTestMutf8)
367 {
368 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0x00};
369 uint32_t utf16Length = data.size() - 1;
370 String *string =
371 String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
372 size_t start = 2;
373 size_t len = string->GetMUtf8Length();
374 std::vector<uint8_t> res = {'c', 'd', 0x00};
375 std::vector<uint8_t> out8(res.size());
376 ASSERT_EQ(string->CopyDataRegionMUtf8(out8.data(), start, len - start - 1 - 1, out8.size()), out8.size() - 1);
377 out8[out8.size() - 1] = '\0';
378 ASSERT_EQ(out8, res);
379 size_t len16 = string->GetUtf16Length();
380 std::vector<uint16_t> res16 = {'c', 'd'};
381 std::vector<uint16_t> out16(res16.size());
382 ASSERT_EQ(string->CopyDataRegionUtf16(out16.data(), start, len16 - start - 1, out16.size()), out16.size());
383 ASSERT_EQ(out16, res16);
384 }
385
TEST_F(StringTest,RegionCopyTestUtf8)386 TEST_F(StringTest, RegionCopyTestUtf8)
387 {
388 std::vector<uint8_t> data {'a', 'b', 'h', 'e', 'l', 'l', 'o', 'c', 'd', 'z', 0};
389 std::vector<uint8_t> res {'h', 'e', 'l', 'l', 'o', 0};
390 std::vector<uint8_t> copiedDataUtf8(res.size());
391 size_t start = 2;
392 size_t len = 5;
393 String *str =
394 String::CreateFromUtf8(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
395
396 ASSERT_EQ(str->CopyDataRegionUtf8(copiedDataUtf8.data(), start, len, copiedDataUtf8.size() - 1), res.size() - 1);
397 ASSERT_EQ(copiedDataUtf8, res);
398
399 std::vector<uint16_t> res16 {'h', 'e', 'l', 'l', 'o'};
400 std::vector<uint16_t> copiedDataUtf16(res16.size());
401
402 ASSERT_EQ(str->CopyDataRegionUtf16(copiedDataUtf16.data(), start, len, copiedDataUtf16.size()), res16.size());
403 ASSERT_EQ(copiedDataUtf16, res16);
404 }
405
TEST_F(StringTest,RegionCopyTestUtf16)406 TEST_F(StringTest, RegionCopyTestUtf16)
407 {
408 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0xc2, 0xa7, 0x00};
409 uint32_t utf16Length = data.size() - 1 - 1;
410 String *string =
411 String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
412 size_t start = 2;
413 std::vector<uint8_t> res = {'c', 'd', 'z', 0x00};
414 std::vector<uint8_t> out8(res.size());
415 ASSERT_EQ(string->CopyDataRegionMUtf8(out8.data(), start, 3U, out8.size()), out8.size() - 1);
416 out8[out8.size() - 1] = '\0';
417 ASSERT_EQ(out8, res);
418 size_t len16 = string->GetUtf16Length();
419 std::vector<uint16_t> out16(len16 - start - 1);
420 std::vector<uint16_t> res16 = {'c', 'd', 'z'};
421 ASSERT_EQ(string->CopyDataRegionUtf16(out16.data(), start, 3U, out16.size()), out16.size());
422 ASSERT_EQ(out16, res16);
423 }
424
TEST_F(StringTest,GetUtf8Length)425 TEST_F(StringTest, GetUtf8Length)
426 {
427 std::vector<uint8_t> data = {'H', 'e', 'l', 'l', 'o', 'w', 'o', 'r', 'l', 'd', '!', 0};
428 String *str =
429 String::CreateFromUtf8(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
430 ASSERT_EQ(str->GetUtf8Length(), data.size());
431 }
432
TEST_F(StringTest,GetDataUtf8)433 TEST_F(StringTest, GetDataUtf8)
434 {
435 std::vector<uint8_t> example = {'e', 'x', 'a', 'm', 'p', 'l', 'e'};
436 String *string1 = String::CreateFromUtf8(example.data(), example.size(), GetLanguageContext(),
437 Runtime::GetCurrent()->GetPandaVM());
438 ASSERT_FALSE(string1->IsUtf16());
439 std::vector<uint8_t> data2(string1->GetDataUtf8(), string1->GetDataUtf8() + example.size()); // NOLINT
440
441 String *string2 =
442 String::CreateFromUtf8(data2.data(), data2.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
443 ASSERT_FALSE(string2->IsUtf16());
444 ASSERT_TRUE(String::StringsAreEqual(string1, string2));
445 }
446
TEST_F(StringTest,SameLengthStringCompareTest)447 TEST_F(StringTest, SameLengthStringCompareTest)
448 {
449 static constexpr uint32_t STRING_LENGTH = 10;
450 char *fString = new char[STRING_LENGTH + 1];
451 char *sString = new char[STRING_LENGTH + 1];
452
453 for (uint32_t i = 0; i < STRING_LENGTH; i++) {
454 // Hack for ConvertMUtf8ToUtf16 call.
455 // We should use char from 0x7f to 0x0 if we want to
456 // generate one utf16 (0x00xx) from this mutf8.
457 // NOLINTNEXTLINE(cert-msc50-cpp)
458 uint8_t val1 = rand();
459 val1 = val1 >> 1U;
460 if (val1 == 0) {
461 val1++;
462 }
463
464 // NOLINTNEXTLINE(cert-msc50-cpp)
465 uint8_t val2 = rand();
466 val2 = val2 >> 1U;
467 if (val2 == 0) {
468 val2++;
469 }
470
471 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
472 fString[i] = val1;
473 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
474 sString[i] = val2;
475 }
476 // Set the last elements in strings with size more than 0x8 to disable compressing.
477 // This will leads to count two MUtf-8 bytes as one UTF-16 so length = string_length - 1
478 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
479 fString[STRING_LENGTH - 2U] = uint8_t(0x80);
480 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
481 sString[STRING_LENGTH - 2U] = uint8_t(0x80);
482 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
483 fString[STRING_LENGTH - 1] = uint8_t(0x01);
484 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
485 sString[STRING_LENGTH - 1] = uint8_t(0x01);
486 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
487 fString[STRING_LENGTH] = '\0';
488 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
489 sString[STRING_LENGTH] = '\0';
490
491 String *firstUtf16String = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), STRING_LENGTH - 1,
492 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
493 // Try to use function with automatic length detection
494 String *secondUtf16String = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString),
495 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
496 ASSERT_EQ(firstUtf16String->GetLength(), STRING_LENGTH - 1);
497 ASSERT_EQ(secondUtf16String->GetLength(), STRING_LENGTH - 1);
498
499 // Dirty hack to not create utf16 for our purpose, just reuse old one
500 // Try to create compressed strings.
501 String *firstUtf8String = String::CreateFromUtf16(firstUtf16String->GetDataUtf16(), STRING_LENGTH - 1,
502 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
503 String *secondUtf8String = String::CreateFromUtf16(firstUtf16String->GetDataUtf16(), STRING_LENGTH - 1,
504 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
505 ASSERT_EQ(firstUtf8String->GetLength(), STRING_LENGTH - 1);
506 ASSERT_EQ(secondUtf8String->GetLength(), STRING_LENGTH - 1);
507
508 ASSERT_EQ(String::StringsAreEqual(firstUtf16String, secondUtf16String), strcmp(fString, sString) == 0);
509 ASSERT_EQ(String::StringsAreEqual(firstUtf16String, secondUtf8String),
510 firstUtf16String->IsUtf16() == secondUtf8String->IsUtf16());
511 ASSERT_EQ(String::StringsAreEqual(firstUtf8String, secondUtf8String), true);
512 ASSERT_TRUE(firstUtf16String->IsUtf16());
513 ASSERT_TRUE(String::StringsAreEqualUtf16(firstUtf16String, firstUtf16String->GetDataUtf16(),
514 firstUtf16String->GetLength()));
515
516 delete[] fString;
517 delete[] sString;
518 }
519
TEST_F(StringTest,ObjectSize)520 TEST_F(StringTest, ObjectSize)
521 {
522 {
523 std::vector<uint8_t> data {'1', '2', '3', '4', '5', 0x00};
524 uint32_t utf16Length = data.size() - 1;
525 String *string = String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(),
526 Runtime::GetCurrent()->GetPandaVM());
527 ASSERT_EQ(string->ObjectSize(), String::ComputeSizeMUtf8(utf16Length));
528 }
529
530 {
531 std::vector<uint8_t> data {0x80, 0x01, 0x80, 0x02, 0x00};
532 uint32_t utf16Length = data.size() / 2U;
533 String *string = String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(),
534 Runtime::GetCurrent()->GetPandaVM());
535 ASSERT_EQ(string->ObjectSize(), String::ComputeSizeUtf16(utf16Length));
536 }
537 }
538
TEST_F(StringTest,AtTest)539 TEST_F(StringTest, AtTest)
540 {
541 // utf8
542 std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
543 String *string = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
544 Runtime::GetCurrent()->GetPandaVM());
545 ASSERT_EQ(false, string->IsUtf16());
546 for (uint32_t i = 0; i < data1.size() - 1; i++) {
547 ASSERT_EQ(data1[i], string->At(i));
548 }
549
550 // utf16
551 std::vector<uint16_t> data2 {'a', 'b', 0xab, 0xdc, 'z', 0};
552 string = String::CreateFromUtf16(data2.data(), data2.size() - 1, GetLanguageContext(),
553 Runtime::GetCurrent()->GetPandaVM());
554 ASSERT_EQ(true, string->IsUtf16());
555 for (uint32_t i = 0; i < data2.size() - 1; i++) {
556 ASSERT_EQ(data2[i], string->At(i));
557 }
558
559 // utf16 -> utf8
560 std::vector<uint16_t> data3 {'a', 'b', 121, 122, 'z', 0};
561 string = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
562 Runtime::GetCurrent()->GetPandaVM());
563 ASSERT_EQ(false, string->IsUtf16());
564 for (uint32_t i = 0; i < data3.size() - 1; i++) {
565 ASSERT_EQ(data3[i], string->At(i));
566 }
567 }
568
TEST_F(StringTest,IndexOfTest)569 TEST_F(StringTest, IndexOfTest)
570 {
571 std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
572 std::vector<uint8_t> data2 {'b', 'c', 'd', 0};
573 std::vector<uint16_t> data3 {'a', 'b', 'c', 'd', 'z', 0};
574 std::vector<uint16_t> data4 {'b', 'c', 'd', 0};
575 String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
576 Runtime::GetCurrent()->GetPandaVM());
577 String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
578 Runtime::GetCurrent()->GetPandaVM());
579 String *string3 = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
580 Runtime::GetCurrent()->GetPandaVM());
581 String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
582 Runtime::GetCurrent()->GetPandaVM());
583
584 auto index = string1->IndexOf(string2, 1);
585 auto index1 = string1->IndexOf(string4, 1);
586 auto index2 = string3->IndexOf(string2, 1);
587 auto index3 = string3->IndexOf(string4, 1);
588 std::cout << index << std::endl;
589 ASSERT_EQ(index, index2);
590 ASSERT_EQ(index1, index3);
591 index = string1->IndexOf(string2, 2_I);
592 index1 = string1->IndexOf(string4, 2_I);
593 index2 = string3->IndexOf(string2, 2_I);
594 index3 = string3->IndexOf(string4, 2_I);
595 std::cout << index << std::endl;
596 ASSERT_EQ(index, index2);
597 ASSERT_EQ(index1, index3);
598 }
599
TEST_F(StringTest,IndexOfTest2)600 TEST_F(StringTest, IndexOfTest2)
601 {
602 {
603 std::vector<uint8_t> stringData {'a', 'b', 'a', 'c', 'a', 'b', 'a', 0};
604 std::vector<uint8_t> patternData {'a', 'b', 'a', 0};
605 String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
606 Runtime::GetCurrent()->GetPandaVM());
607 String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
608 Runtime::GetCurrent()->GetPandaVM());
609 ASSERT_EQ(0, string->IndexOf(pattern, -1));
610 ASSERT_EQ(0, string->IndexOf(pattern, 0));
611 ASSERT_EQ(4_I, string->IndexOf(pattern, 1));
612 ASSERT_EQ(4_I, string->IndexOf(pattern, 4_I));
613 ASSERT_EQ(-1, string->IndexOf(pattern, 5_I));
614 ASSERT_EQ(-1, string->IndexOf(pattern, 6_I));
615
616 String *emptyString = String::CreateEmptyString(GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
617 ASSERT_EQ(-1, emptyString->IndexOf(string, 0));
618 ASSERT_EQ(0, string->IndexOf(emptyString, -3_I));
619 ASSERT_EQ(2_I, string->IndexOf(emptyString, 2_I));
620 ASSERT_EQ(7_I, string->IndexOf(emptyString, 10_I));
621 }
622 {
623 std::vector<uint8_t> stringData {'a', 'b', 'c', 'd', 'e', 'f', 'g', 0};
624 std::vector<uint8_t> patternData {'d', 'e', 'f', 0};
625 String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
626 Runtime::GetCurrent()->GetPandaVM());
627 String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
628 Runtime::GetCurrent()->GetPandaVM());
629 ASSERT_EQ(3_I, string->IndexOf(pattern, 0));
630 }
631 {
632 std::vector<uint8_t> stringData {'a', 'b', 'a', 'a', 'a', 'a', 'a', 0};
633 std::vector<uint8_t> patternData {'a', 'a', 'a', 0};
634 String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
635 Runtime::GetCurrent()->GetPandaVM());
636 String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
637 Runtime::GetCurrent()->GetPandaVM());
638 ASSERT_EQ(2_I, string->IndexOf(pattern, 0));
639 ASSERT_EQ(2_I, string->IndexOf(pattern, 2_I));
640 ASSERT_EQ(3_I, string->IndexOf(pattern, 3_I));
641 ASSERT_EQ(4_I, string->IndexOf(pattern, 4_I));
642 ASSERT_EQ(-1, string->IndexOf(pattern, 5_I));
643 }
644 }
645
TEST_F(StringTest,CompareTestUtf8)646 TEST_F(StringTest, CompareTestUtf8)
647 {
648 // utf8
649 std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
650 std::vector<uint8_t> data2 {'a', 'b', 'c', 'd', 'z', 'x', 0};
651 std::vector<uint16_t> data3 {'a', 'b', 'c', 'd', 'z', 0};
652 std::vector<uint16_t> data4 {'a', 'b', 'd', 'c', 'z', 0};
653 String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
654 Runtime::GetCurrent()->GetPandaVM());
655 String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
656 Runtime::GetCurrent()->GetPandaVM());
657 String *string3 = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
658 Runtime::GetCurrent()->GetPandaVM());
659 String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
660 Runtime::GetCurrent()->GetPandaVM());
661 ASSERT_EQ(false, string1->IsUtf16());
662 ASSERT_EQ(false, string2->IsUtf16());
663 ASSERT_EQ(false, string3->IsUtf16());
664 ASSERT_EQ(false, string4->IsUtf16());
665 ASSERT_LT(string1->Compare(string2), 0);
666 ASSERT_GT(string2->Compare(string1), 0);
667 ASSERT_EQ(string1->Compare(string3), 0);
668 ASSERT_EQ(string3->Compare(string1), 0);
669 ASSERT_LT(string2->Compare(string4), 0);
670 ASSERT_GT(string4->Compare(string2), 0);
671
672 // utf8 vs utf16
673 std::vector<uint16_t> data5 {'a', 'b', 0xab, 0xdc, 'z', 0};
674 String *string5 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
675 Runtime::GetCurrent()->GetPandaVM());
676 ASSERT_EQ(true, string5->IsUtf16());
677 ASSERT_LT(string2->Compare(string5), 0);
678 ASSERT_GT(string5->Compare(string2), 0);
679 ASSERT_LT(string4->Compare(string5), 0);
680 ASSERT_GT(string5->Compare(string4), 0);
681
682 // compare with self
683 ASSERT_EQ(string1->Compare(string1), 0);
684 ASSERT_EQ(string2->Compare(string2), 0);
685 ASSERT_EQ(string3->Compare(string3), 0);
686 ASSERT_EQ(string4->Compare(string4), 0);
687 ASSERT_EQ(string5->Compare(string5), 0);
688 }
689
TEST_F(StringTest,CompareTestUtf16)690 TEST_F(StringTest, CompareTestUtf16)
691 {
692 std::vector<uint16_t> data5 {'a', 'b', 0xab, 0xdc, 'z', 0};
693 String *string5 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
694 Runtime::GetCurrent()->GetPandaVM());
695 std::vector<uint16_t> data6 {'a', 0xab, 0xab, 0};
696 String *string6 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
697 Runtime::GetCurrent()->GetPandaVM());
698 String *string7 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
699 Runtime::GetCurrent()->GetPandaVM());
700 ASSERT_EQ(true, string5->IsUtf16());
701 ASSERT_EQ(true, string6->IsUtf16());
702 ASSERT_EQ(true, string7->IsUtf16());
703
704 ASSERT_LT(string5->Compare(string6), 0);
705 ASSERT_GT(string6->Compare(string5), 0);
706 ASSERT_EQ(string6->Compare(string7), 0);
707 ASSERT_EQ(string7->Compare(string6), 0);
708
709 // compare with self
710 ASSERT_EQ(string5->Compare(string5), 0);
711 ASSERT_EQ(string6->Compare(string6), 0);
712 ASSERT_EQ(string7->Compare(string7), 0);
713 }
714
TEST_F(StringTest,CompareTestLongUtf8)715 TEST_F(StringTest, CompareTestLongUtf8)
716 {
717 // long utf8 string vs long utf8 string
718 // utf8
719 std::vector<uint8_t> data8(16U, 'a');
720 data8.push_back(0);
721
722 std::vector<uint8_t> data9(16U, 'a');
723 std::vector<uint8_t> tmp1 {'x', 'z'};
724 data9.insert(data9.end(), tmp1.begin(), tmp1.end());
725 data9.push_back(0);
726
727 std::vector<uint8_t> data10(16U, 'a');
728 std::vector<uint8_t> tmp2 {'x', 'x', 'x', 'y', 'y', 'a', 'a'};
729 data10.insert(data10.end(), tmp2.begin(), tmp2.end());
730 data10.insert(data10.end(), 16U, 'a');
731 data10.push_back(0);
732
733 std::vector<uint8_t> data11(16U, 'a');
734 std::vector<uint8_t> tmp3 {'x', 'x', 'x', 'y', 'y', 'y', 'y'};
735 data11.insert(data11.end(), tmp3.begin(), tmp3.end());
736 data11.insert(data11.end(), 16U, 'a');
737 data11.push_back(0);
738
739 String *string8 = String::CreateFromMUtf8(data8.data(), data8.size() - 1, GetLanguageContext(),
740 Runtime::GetCurrent()->GetPandaVM());
741 String *string9 = String::CreateFromMUtf8(data9.data(), data9.size() - 1, GetLanguageContext(),
742 Runtime::GetCurrent()->GetPandaVM());
743 String *string10 = String::CreateFromMUtf8(data10.data(), data10.size() - 1, GetLanguageContext(),
744 Runtime::GetCurrent()->GetPandaVM());
745 String *string11 = String::CreateFromMUtf8(data11.data(), data11.size() - 1, GetLanguageContext(),
746 Runtime::GetCurrent()->GetPandaVM());
747 String *string12 = String::CreateFromMUtf8(data8.data(), data8.size() - 1, GetLanguageContext(),
748 Runtime::GetCurrent()->GetPandaVM());
749 String *string13 = String::CreateFromMUtf8(data9.data(), data9.size() - 1, GetLanguageContext(),
750 Runtime::GetCurrent()->GetPandaVM());
751
752 // utf8 vs utf8
753 ASSERT_EQ(string8->Compare(string12), 0);
754 ASSERT_EQ(string12->Compare(string8), 0);
755 ASSERT_EQ(string9->Compare(string13), 0);
756 ASSERT_EQ(string13->Compare(string9), 0);
757 ASSERT_LT(string10->Compare(string11), 0);
758 ASSERT_GT(string11->Compare(string10), 0);
759 ASSERT_LT(string10->Compare(string9), 0);
760 ASSERT_GT(string9->Compare(string10), 0);
761 }
762
TEST_F(StringTest,CompareTestLongUtf16)763 TEST_F(StringTest, CompareTestLongUtf16)
764 {
765 // long utf16 string vs long utf16 string
766 // utf16
767 std::vector<uint16_t> data14(16U, 0xab);
768 data14.push_back(0);
769
770 std::vector<uint16_t> data15(16U, 0xab);
771 std::vector<uint16_t> tmp4 {'a', 0xbb};
772 data15.insert(data15.end(), tmp4.begin(), tmp4.end());
773 data15.push_back(0);
774
775 std::vector<uint16_t> data16(16U, 0xab);
776 std::vector<uint16_t> tmp5 {'a', 'a', 0xcc, 0xcc, 0xdd, 0xdd, 0xdd};
777 data16.insert(data16.end(), tmp5.begin(), tmp5.end());
778 data16.insert(data16.end(), 16U, 0xab);
779 data16.push_back(0);
780
781 std::vector<uint16_t> data17(16U, 0xab);
782 std::vector<uint16_t> tmp6 {'a', 'a', 0xdd, 0xdd, 0xdd, 0xdd, 0xdd};
783 data17.insert(data17.end(), tmp6.begin(), tmp6.end());
784 data17.insert(data17.end(), 16U, 0xab);
785 data17.push_back(0);
786
787 String *string14 = String::CreateFromUtf16(data14.data(), data14.size() - 1, GetLanguageContext(),
788 Runtime::GetCurrent()->GetPandaVM());
789 String *string15 = String::CreateFromUtf16(data15.data(), data15.size() - 1, GetLanguageContext(),
790 Runtime::GetCurrent()->GetPandaVM());
791 String *string16 = String::CreateFromUtf16(data16.data(), data16.size() - 1, GetLanguageContext(),
792 Runtime::GetCurrent()->GetPandaVM());
793 String *string17 = String::CreateFromUtf16(data17.data(), data17.size() - 1, GetLanguageContext(),
794 Runtime::GetCurrent()->GetPandaVM());
795 String *string18 = String::CreateFromUtf16(data14.data(), data14.size() - 1, GetLanguageContext(),
796 Runtime::GetCurrent()->GetPandaVM());
797 String *string19 = String::CreateFromUtf16(data15.data(), data15.size() - 1, GetLanguageContext(),
798 Runtime::GetCurrent()->GetPandaVM());
799
800 // utf16 vs utf16
801 ASSERT_EQ(string14->Compare(string18), 0);
802 ASSERT_EQ(string18->Compare(string14), 0);
803 ASSERT_EQ(string15->Compare(string19), 0);
804 ASSERT_EQ(string19->Compare(string15), 0);
805 ASSERT_LT(string16->Compare(string17), 0);
806 ASSERT_GT(string17->Compare(string16), 0);
807 ASSERT_LT(string16->Compare(string15), 0);
808 ASSERT_GT(string15->Compare(string16), 0);
809 }
810
TEST_F(StringTest,ConcatTest)811 TEST_F(StringTest, ConcatTest)
812 {
813 // utf8 + utf8
814 std::vector<uint8_t> data1 {'f', 'g', 'h', 0};
815 std::vector<uint8_t> data2 {'a', 'b', 'c', 'd', 'e', 0};
816 std::vector<uint8_t> data3;
817 data3.insert(data3.end(), data1.begin(), data1.end() - 1);
818 data3.insert(data3.end(), data2.begin(), data2.end());
819
820 String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
821 Runtime::GetCurrent()->GetPandaVM());
822 String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
823 Runtime::GetCurrent()->GetPandaVM());
824 String *string30 = String::CreateFromMUtf8(data3.data(), data3.size() - 1, GetLanguageContext(),
825 Runtime::GetCurrent()->GetPandaVM());
826 ASSERT_EQ(false, string1->IsUtf16());
827 ASSERT_EQ(false, string2->IsUtf16());
828 String *string31 = String::Concat(string1, string2, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
829 ASSERT_EQ(string30->Compare(string31), 0);
830 ASSERT_EQ(string31->Compare(string30), 0);
831
832 // utf8 + utf16
833 std::vector<uint16_t> data4 {'a', 'b', 0xab, 0xdc, 'z', 0};
834 std::vector<uint16_t> data5 {'f', 'g', 'h', 'a', 'b', 0xab, 0xdc, 'z', 0}; // data1 + data4
835 String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
836 Runtime::GetCurrent()->GetPandaVM());
837 String *string50 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
838 Runtime::GetCurrent()->GetPandaVM());
839 String *string51 = String::Concat(string1, string4, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
840 ASSERT_EQ(string50->GetLength(), string51->GetLength());
841 ASSERT_EQ(string50->Compare(string51), 0);
842 ASSERT_EQ(string51->Compare(string50), 0);
843
844 // utf16 + utf16
845 std::vector<uint16_t> data6;
846 data6.insert(data6.end(), data4.begin(), data4.end() - 1);
847 data6.insert(data6.end(), data5.begin(), data5.end());
848 String *string60 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
849 Runtime::GetCurrent()->GetPandaVM());
850 String *string61 = String::Concat(string4, string50, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
851 ASSERT_EQ(string60->Compare(string61), 0);
852 ASSERT_EQ(string61->Compare(string60), 0);
853 }
854
TEST_F(StringTest,DoReplaceTest0)855 TEST_F(StringTest, DoReplaceTest0)
856 {
857 static constexpr uint32_t STRING_LENGTH = 10;
858 char *fString = new char[STRING_LENGTH + 1];
859 char *sString = new char[STRING_LENGTH + 1];
860
861 for (uint32_t i = 0; i < STRING_LENGTH; i++) {
862 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
863 fString[i] = 'A' + i;
864 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
865 sString[i] = 'A' + i;
866 }
867 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
868 fString[0] = 'Z';
869 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
870 fString[STRING_LENGTH] = '\0';
871 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
872 sString[STRING_LENGTH] = '\0';
873
874 String *fStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), GetLanguageContext(),
875 Runtime::GetCurrent()->GetPandaVM());
876 String *sStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString), GetLanguageContext(),
877 Runtime::GetCurrent()->GetPandaVM());
878 String *tStringS = String::DoReplace(fStringS, 'Z', 'A', GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
879 ASSERT_EQ(String::StringsAreEqual(tStringS, sStringS), true);
880
881 delete[] fString;
882 delete[] sString;
883 }
884
TEST_F(StringTest,FastSubstringTest0)885 TEST_F(StringTest, FastSubstringTest0)
886 {
887 uint32_t stringLength = 10;
888 char *fString = new char[stringLength + 1];
889 for (uint32_t i = 0; i < stringLength; i++) {
890 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
891 fString[i] = 'A' + i;
892 }
893 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
894 fString[stringLength] = '\0';
895
896 uint32_t subStringLength = 5;
897 uint32_t subStringStart = 1;
898 char *sString = new char[subStringLength + 1];
899 for (uint32_t j = 0; j < subStringLength; j++) {
900 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
901 sString[j] = fString[subStringStart + j];
902 }
903 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
904 sString[subStringLength] = '\0';
905
906 String *fStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), GetLanguageContext(),
907 Runtime::GetCurrent()->GetPandaVM());
908 String *sStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString), GetLanguageContext(),
909 Runtime::GetCurrent()->GetPandaVM());
910 String *tStringS = String::FastSubString(fStringS, subStringStart, subStringLength, GetLanguageContext(),
911 Runtime::GetCurrent()->GetPandaVM());
912 ASSERT_EQ(String::StringsAreEqual(tStringS, sStringS), true);
913
914 delete[] fString;
915 delete[] sString;
916 }
917
TEST_F(StringTest,ToCharArray)918 TEST_F(StringTest, ToCharArray)
919 {
920 // utf8
921 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'e', 0};
922 String *utf8String = String::CreateFromMUtf8(data.data(), data.size() - 1, GetLanguageContext(),
923 Runtime::GetCurrent()->GetPandaVM());
924 Array *newArray = utf8String->ToCharArray(GetLanguageContext());
925 for (uint32_t i = 0; i < newArray->GetLength(); ++i) {
926 ASSERT_EQ(data[i], newArray->Get<uint16_t>(i));
927 }
928
929 std::vector<uint16_t> data1 {'f', 'g', 'h', 'a', 'b', 0x8ab, 0xdc, 'z', 0};
930 String *utf16String = String::CreateFromUtf16(data1.data(), data1.size() - 1, GetLanguageContext(),
931 Runtime::GetCurrent()->GetPandaVM());
932 Array *newArray1 = utf16String->ToCharArray(GetLanguageContext());
933 for (uint32_t i = 0; i < newArray1->GetLength(); ++i) {
934 ASSERT_EQ(data1[i], newArray1->Get<uint16_t>(i));
935 }
936 }
937
TEST_F(StringTest,CreateNewStingFromCharArray)938 TEST_F(StringTest, CreateNewStingFromCharArray)
939 {
940 std::vector<uint16_t> data {'f', 'g', 'h', 'a', 'b', 0x8ab, 0xdc, 'z', 0};
941 String *utf16String = String::CreateFromUtf16(data.data(), data.size() - 1, GetLanguageContext(),
942 Runtime::GetCurrent()->GetPandaVM());
943 Array *charArray = utf16String->ToCharArray(GetLanguageContext());
944
945 uint32_t charArrayLength = 5;
946 uint32_t charArrayOffset = 1;
947 std::vector<uint16_t> data1(charArrayLength + 1);
948 for (uint32_t i = 0; i < charArrayLength; ++i) {
949 data1[i] = data[i + charArrayOffset];
950 }
951 data1[charArrayLength] = 0;
952 String *utf16String1 = String::CreateFromUtf16(data1.data(), data1.size() - 1, GetLanguageContext(),
953 Runtime::GetCurrent()->GetPandaVM());
954
955 String *result = String::CreateNewStringFromChars(charArrayOffset, charArrayLength, charArray, GetLanguageContext(),
956 Runtime::GetCurrent()->GetPandaVM());
957
958 ASSERT_EQ(String::StringsAreEqual(result, utf16String1), true);
959 }
960
TEST_F(StringTest,CreateNewStingFromByteArray)961 TEST_F(StringTest, CreateNewStingFromByteArray)
962 {
963 std::vector<uint8_t> data {'f', 'g', 'h', 'a', 'b', 0xab, 0xdc, 'z', 0};
964 uint32_t byteArrayLength = 5;
965 uint32_t byteArrayOffset = 1;
966 uint32_t highByte = 0;
967
968 std::vector<uint16_t> data1(byteArrayLength);
969 for (uint32_t i = 0; i < byteArrayLength; ++i) {
970 data1[i] = (highByte << 8U) + (data[i + byteArrayOffset] & 0xFFU);
971 }
972 // NB! data1[byte_array_length] = 0; NOT NEEDED
973 String *string1 = String::CreateFromUtf16(data1.data(), byteArrayLength, GetLanguageContext(),
974 Runtime::GetCurrent()->GetPandaVM());
975
976 LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
977 Class *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ark::ClassRoot::ARRAY_I8);
978 Array *byteArray = Array::Create(klass, data.size() - 1);
979 Span<uint8_t> sp(data.data(), data.size() - 1);
980 for (uint32_t i = 0; i < data.size() - 1; i++) {
981 byteArray->Set<uint8_t>(i, sp[i]);
982 }
983
984 String *result = String::CreateNewStringFromBytes(byteArrayOffset, byteArrayLength, highByte, byteArray,
985 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
986
987 ASSERT_EQ(String::StringsAreEqual(result, string1), true);
988 }
989
990 } // namespace ark::coretypes::test
991
992 // NOLINTEND(readability-magic-numbers)
993