1 /**
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <ctime>
17
18 #include "gtest/gtest.h"
19 #include "libpandabase/utils/span.h"
20 #include "libpandabase/utils/utf.h"
21 #include "libpandabase/utils/utils.h"
22 #include "runtime/include/class_linker_extension.h"
23 #include "runtime/include/coretypes/array-inl.h"
24 #include "runtime/include/coretypes/string-inl.h"
25 #include "runtime/include/runtime.h"
26 #include "runtime/include/thread.h"
27
28 // NOLINTBEGIN(readability-magic-numbers)
29
30 namespace ark::coretypes::test {
31
32 class StringTest : public testing::Test {
33 public:
StringTest()34 StringTest()
35 {
36 #ifdef PANDA_NIGHTLY_TEST_ON
37 seed_ = std::time(NULL);
38 #else
39 seed_ = 0xDEADBEEF;
40 #endif
41 srand(seed_);
42 // We need to create a runtime instance to be able to create strings.
43 options_.SetShouldLoadBootPandaFiles(false);
44 options_.SetShouldInitializeIntrinsics(false);
45 Runtime::Create(options_);
46 }
47
~StringTest()48 ~StringTest() override
49 {
50 Runtime::Destroy();
51 }
52
53 NO_COPY_SEMANTIC(StringTest);
54 NO_MOVE_SEMANTIC(StringTest);
55
GetLanguageContext()56 LanguageContext GetLanguageContext()
57 {
58 return Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
59 }
60
SetUp()61 void SetUp() override
62 {
63 thread_ = ark::MTManagedThread::GetCurrent();
64 thread_->ManagedCodeBegin();
65 }
66
TearDown()67 void TearDown() override
68 {
69 thread_->ManagedCodeEnd();
70 }
71
72 protected:
73 static constexpr uint32_t SIMPLE_UTF8_STRING_LENGTH = 13;
74 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
75 static constexpr char SIMPLE_UTF8_STRING[SIMPLE_UTF8_STRING_LENGTH + 1] = "Hello, world!";
76
77 private:
78 ark::MTManagedThread *thread_ {};
79 unsigned seed_ {};
80 RuntimeOptions options_;
81 };
82
TEST_F(StringTest,EqualStringWithCompressedRawUtf8Data)83 TEST_F(StringTest, EqualStringWithCompressedRawUtf8Data)
84 {
85 std::vector<uint8_t> data {0x01, 0x05, 0x07, 0x00};
86 uint32_t utf16Length = data.size() - 1;
87 auto *firstString =
88 String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
89 ASSERT_TRUE(String::StringsAreEqualMUtf8(firstString, data.data(), utf16Length));
90 }
91
TEST_F(StringTest,EqualStringWithNotCompressedRawUtf8Data)92 TEST_F(StringTest, EqualStringWithNotCompressedRawUtf8Data)
93 {
94 std::vector<uint8_t> data {0xc2, 0xa7};
95
96 for (size_t i = 0; i < 20U; i++) {
97 data.push_back(0x30 + i);
98 }
99 data.push_back(0);
100
101 uint32_t utf16Length = data.size() - 2U;
102 auto *firstString =
103 String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
104 ASSERT_TRUE(String::StringsAreEqualMUtf8(firstString, data.data(), utf16Length));
105 }
106
TEST_F(StringTest,NotEqualStringWithNotCompressedRawUtf8Data)107 TEST_F(StringTest, NotEqualStringWithNotCompressedRawUtf8Data)
108 {
109 std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
110 std::vector<uint8_t> data2 {0xc2, 0xa7, 0x34, 0x00};
111 uint32_t utf16Length = 2;
112 auto *firstString =
113 String::CreateFromMUtf8(data1.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
114 ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length));
115 }
116
TEST_F(StringTest,NotEqualStringNotCompressedStringWithCompressedRawData)117 TEST_F(StringTest, NotEqualStringNotCompressedStringWithCompressedRawData)
118 {
119 std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
120 std::vector<uint8_t> data2 {0x02, 0x07, 0x04, 0x00};
121 uint32_t utf16Length1 = 2;
122 uint32_t utf16Length2 = 3;
123 auto *firstString =
124 String::CreateFromMUtf8(data1.data(), utf16Length1, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
125 ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length2));
126 }
127
TEST_F(StringTest,NotEqualCompressedStringWithUncompressedRawUtf8Data)128 TEST_F(StringTest, NotEqualCompressedStringWithUncompressedRawUtf8Data)
129 {
130 std::vector<uint8_t> data1 {0x02, 0x07, 0x04, 0x00};
131 std::vector<uint8_t> data2 {0xc2, 0xa7, 0x33, 0x00};
132 uint32_t utf16Length1 = 3;
133 uint32_t utf16Length2 = 2;
134 auto *firstString =
135 String::CreateFromMUtf8(data1.data(), utf16Length1, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
136 ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length2));
137 }
138
TEST_F(StringTest,EqualStringWithMUtf8DifferentLength)139 TEST_F(StringTest, EqualStringWithMUtf8DifferentLength)
140 {
141 std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
142 std::vector<uint8_t> data2 {0xc2, 0xa7, 0x00};
143 uint32_t utf16Length = 2;
144 auto *firstString =
145 String::CreateFromMUtf8(data1.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
146 ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length - 1));
147 }
148
TEST_F(StringTest,EqualStringWithRawUtf16Data)149 TEST_F(StringTest, EqualStringWithRawUtf16Data)
150 {
151 std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
152 auto *firstString =
153 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
154 auto secondString = reinterpret_cast<const uint16_t *>(data.data());
155 ASSERT_TRUE(String::StringsAreEqualUtf16(firstString, secondString, data.size()));
156 }
157
TEST_F(StringTest,CompareCompressedStringWithRawUtf16)158 TEST_F(StringTest, CompareCompressedStringWithRawUtf16)
159 {
160 std::vector<uint16_t> data;
161
162 for (size_t i = 0; i < 30U; i++) {
163 data.push_back(i + 1);
164 }
165 data.push_back(0);
166
167 auto *firstString = String::CreateFromUtf16(data.data(), data.size() - 1, GetLanguageContext(),
168 Runtime::GetCurrent()->GetPandaVM());
169 auto secondString = reinterpret_cast<const uint16_t *>(data.data());
170 ASSERT_TRUE(String::StringsAreEqualUtf16(firstString, secondString, data.size() - 1));
171 }
172
TEST_F(StringTest,EqualStringWithRawUtf16DifferentLength)173 TEST_F(StringTest, EqualStringWithRawUtf16DifferentLength)
174 {
175 std::vector<uint16_t> data1 {0xffc3, 0x33, 0x00};
176 std::vector<uint16_t> data2 {0xffc3, 0x33, 0x55, 0x00};
177 auto *firstString =
178 String::CreateFromUtf16(data1.data(), data1.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
179 auto secondString = reinterpret_cast<const uint16_t *>(data2.data());
180 ASSERT_FALSE(String::StringsAreEqualUtf16(firstString, secondString, data2.size()));
181 }
182
TEST_F(StringTest,NotEqualStringWithRawUtf16Data)183 TEST_F(StringTest, NotEqualStringWithRawUtf16Data)
184 {
185 std::vector<uint16_t> data1 {0xffc3, 0x33, 0x00};
186 std::vector<uint16_t> data2 {0xffc3, 0x34, 0x00};
187 auto *firstString =
188 String::CreateFromUtf16(data1.data(), data1.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
189
190 auto secondString = reinterpret_cast<const uint16_t *>(data2.data());
191 ASSERT_FALSE(String::StringsAreEqualUtf16(firstString, secondString, data2.size()));
192 }
193
TEST_F(StringTest,compressedHashCodeUtf8)194 TEST_F(StringTest, compressedHashCodeUtf8)
195 {
196 String *firstString =
197 String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
198 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
199 auto stringHashCode = firstString->GetHashcode();
200 auto rawHashCode =
201 String::ComputeHashcodeMutf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH);
202
203 ASSERT_EQ(stringHashCode, rawHashCode);
204 }
TEST_F(StringTest,notCompressedHashCodeUtf8)205 TEST_F(StringTest, notCompressedHashCodeUtf8)
206 {
207 std::vector<uint8_t> data {0xc2, 0xa7};
208
209 size_t size = 1;
210 for (size_t i = 0; i < 20U; i++) {
211 data.push_back(0x30 + i);
212 size += 1;
213 }
214 data.push_back(0);
215
216 String *firstString = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(data.data()), size,
217 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
218 auto stringHashCode = firstString->GetHashcode();
219 auto rawHashCode = String::ComputeHashcodeMutf8(reinterpret_cast<const uint8_t *>(data.data()), size);
220
221 ASSERT_EQ(stringHashCode, rawHashCode);
222 }
223
TEST_F(StringTest,compressedHashCodeUtf16)224 TEST_F(StringTest, compressedHashCodeUtf16)
225 {
226 std::vector<uint16_t> data;
227
228 size_t size = 30;
229 for (size_t i = 0; i < size; i++) {
230 data.push_back(i + 1);
231 }
232 data.push_back(0);
233
234 auto *firstString =
235 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
236 auto stringHashCode = firstString->GetHashcode();
237 auto rawHashCode = String::ComputeHashcodeUtf16(data.data(), data.size());
238 ASSERT_EQ(stringHashCode, rawHashCode);
239 }
240
TEST_F(StringTest,notCompressedHashCodeUtf16)241 TEST_F(StringTest, notCompressedHashCodeUtf16)
242 {
243 std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
244 auto *firstString =
245 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
246 auto stringHashCode = firstString->GetHashcode();
247 auto rawHashCode = String::ComputeHashcodeUtf16(data.data(), data.size());
248 ASSERT_EQ(stringHashCode, rawHashCode);
249 }
250
TEST_F(StringTest,lengthUtf8)251 TEST_F(StringTest, lengthUtf8)
252 {
253 String *string =
254 String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
255 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
256 ASSERT_EQ(string->GetLength(), SIMPLE_UTF8_STRING_LENGTH);
257 }
258
TEST_F(StringTest,lengthUtf16)259 TEST_F(StringTest, lengthUtf16)
260 {
261 std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
262 auto *string =
263 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
264 ASSERT_EQ(string->GetLength(), data.size());
265 }
266
TEST_F(StringTest,DifferentLengthStringCompareTest)267 TEST_F(StringTest, DifferentLengthStringCompareTest)
268 {
269 static constexpr uint32_t F_STRING_LENGTH = 8;
270 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
271 static constexpr char F_STRING[F_STRING_LENGTH + 1] = "Hello, w";
272 String *firstString =
273 String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
274 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
275 ASSERT_EQ(firstString->GetLength(), SIMPLE_UTF8_STRING_LENGTH);
276 String *secondString = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(F_STRING), F_STRING_LENGTH,
277 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
278 ASSERT_EQ(secondString->GetLength(), F_STRING_LENGTH);
279 ASSERT_EQ(String::StringsAreEqual(firstString, secondString), false);
280 }
281
TEST_F(StringTest,ForeignLengthAndCopyTest1b0)282 TEST_F(StringTest, ForeignLengthAndCopyTest1b0)
283 {
284 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0xc0, 0x80, 0x00};
285 uint32_t utf16Length = data.size();
286 String *string = String::CreateFromMUtf8(data.data(), utf16Length - 2U, GetLanguageContext(),
287 Runtime::GetCurrent()->GetPandaVM()); // c080 is U+0000
288 ASSERT_EQ(string->GetMUtf8Length(), data.size());
289 ASSERT_EQ(string->GetUtf16Length(), data.size() - 2U); // \0 doesn't counts for UTF16
290 std::vector<uint8_t> out8(data.size());
291 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
292 ASSERT_EQ(out8, data);
293 std::vector<uint16_t> res16 {'a', 'b', 'c', 'd', 'z', 0x00};
294 std::vector<uint16_t> out16(res16.size());
295 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
296 ASSERT_EQ(out16, res16);
297 }
298
TEST_F(StringTest,ForeignLengthAndCopyTest1b)299 TEST_F(StringTest, ForeignLengthAndCopyTest1b)
300 {
301 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0x7f, 0x00};
302 uint32_t utf16Length = data.size();
303 String *string = String::CreateFromMUtf8(data.data(), utf16Length - 1, GetLanguageContext(),
304 Runtime::GetCurrent()->GetPandaVM());
305 ASSERT_EQ(string->GetMUtf8Length(), data.size());
306 ASSERT_EQ(string->GetUtf16Length(), data.size() - 1); // \0 doesn't counts for UTF16
307 std::vector<uint8_t> out8(data.size());
308 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
309 ASSERT_EQ(out8, data);
310 std::vector<uint16_t> res16 {'a', 'b', 'c', 'd', 'z', 0x7f};
311 std::vector<uint16_t> out16(res16.size());
312 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
313 ASSERT_EQ(out16, res16);
314 }
315
TEST_F(StringTest,ForeignLengthAndCopyTest2b)316 TEST_F(StringTest, ForeignLengthAndCopyTest2b)
317 {
318 std::vector<uint8_t> data {0xc2, 0xa7, 0x33, 0x00}; // UTF-16 size is 2
319 String *string =
320 String::CreateFromMUtf8(data.data(), 2U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
321 ASSERT_EQ(string->GetMUtf8Length(), data.size());
322 ASSERT_EQ(string->GetUtf16Length(), 2U); // \0 doesn't counts for UTF16
323 std::vector<uint8_t> out8(data.size());
324 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
325 ASSERT_EQ(out8, data);
326 std::vector<uint16_t> res16 {0xa7, 0x33};
327 std::vector<uint16_t> out16(res16.size());
328 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
329 ASSERT_EQ(out16, res16);
330 }
331
TEST_F(StringTest,ForeignLengthAndCopyTest3b)332 TEST_F(StringTest, ForeignLengthAndCopyTest3b)
333 {
334 std::vector<uint8_t> data {0xef, 0xbf, 0x83, 0x33, 0x00}; // UTF-16 size is 2
335 String *string =
336 String::CreateFromMUtf8(data.data(), 2U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
337 ASSERT_EQ(string->GetMUtf8Length(), data.size());
338 ASSERT_EQ(string->GetUtf16Length(), 2U); // \0 doesn't counts for UTF16
339 std::vector<uint8_t> out8(data.size());
340 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
341 ASSERT_EQ(out8, data);
342 std::vector<uint16_t> res16 {0xffc3, 0x33};
343 std::vector<uint16_t> out16(res16.size());
344 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
345 ASSERT_EQ(out16, res16);
346 }
347
TEST_F(StringTest,ForeignLengthAndCopyTest6b)348 TEST_F(StringTest, ForeignLengthAndCopyTest6b)
349 {
350 std::vector<uint8_t> data {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7, 0x20, 0x00}; // UTF-16 size is 3
351 // We support 4-byte utf-8 sequences, so {0xd801, 0xdc37} is encoded to 4 bytes instead of 6
352 std::vector<uint8_t> utf8Data {0xf0, 0x90, 0x90, 0xb7, 0x20, 0x00};
353 String *string =
354 String::CreateFromMUtf8(data.data(), 3U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
355 ASSERT_EQ(string->GetMUtf8Length(), utf8Data.size());
356 ASSERT_EQ(string->GetUtf16Length(), 3U); // \0 doesn't counts for UTF16
357 std::vector<uint8_t> out8(utf8Data.size());
358 string->CopyDataMUtf8(out8.data(), out8.size(), true);
359 ASSERT_EQ(out8, utf8Data);
360 std::vector<uint16_t> res16 {0xd801, 0xdc37, 0x20};
361 std::vector<uint16_t> out16(res16.size());
362 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
363 ASSERT_EQ(out16, res16);
364 }
365
TEST_F(StringTest,RegionCopyTestMutf8)366 TEST_F(StringTest, RegionCopyTestMutf8)
367 {
368 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0x00};
369 uint32_t utf16Length = data.size() - 1;
370 String *string =
371 String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
372 size_t start = 2;
373 size_t len = string->GetMUtf8Length();
374 std::vector<uint8_t> res = {'c', 'd', 0x00};
375 std::vector<uint8_t> out8(res.size());
376 ASSERT_EQ(string->CopyDataRegionMUtf8(out8.data(), start, len - start - 1 - 1, out8.size()), out8.size() - 1);
377 out8[out8.size() - 1] = '\0';
378 ASSERT_EQ(out8, res);
379 size_t len16 = string->GetUtf16Length();
380 std::vector<uint16_t> res16 = {'c', 'd'};
381 std::vector<uint16_t> out16(res16.size());
382 ASSERT_EQ(string->CopyDataRegionUtf16(out16.data(), start, len16 - start - 1, out16.size()), out16.size());
383 ASSERT_EQ(out16, res16);
384 }
385
TEST_F(StringTest,RegionCopyTestUtf16)386 TEST_F(StringTest, RegionCopyTestUtf16)
387 {
388 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0xc2, 0xa7, 0x00};
389 uint32_t utf16Length = data.size() - 1 - 1;
390 String *string =
391 String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
392 size_t start = 2;
393 std::vector<uint8_t> res = {'c', 'd', 'z', 0x00};
394 std::vector<uint8_t> out8(res.size());
395 ASSERT_EQ(string->CopyDataRegionMUtf8(out8.data(), start, 3U, out8.size()), out8.size() - 1);
396 out8[out8.size() - 1] = '\0';
397 ASSERT_EQ(out8, res);
398 size_t len16 = string->GetUtf16Length();
399 std::vector<uint16_t> out16(len16 - start - 1);
400 std::vector<uint16_t> res16 = {'c', 'd', 'z'};
401 ASSERT_EQ(string->CopyDataRegionUtf16(out16.data(), start, 3U, out16.size()), out16.size());
402 ASSERT_EQ(out16, res16);
403 }
404
TEST_F(StringTest,SameLengthStringCompareTest)405 TEST_F(StringTest, SameLengthStringCompareTest)
406 {
407 static constexpr uint32_t STRING_LENGTH = 10;
408 char *fString = new char[STRING_LENGTH + 1];
409 char *sString = new char[STRING_LENGTH + 1];
410
411 for (uint32_t i = 0; i < STRING_LENGTH; i++) {
412 // Hack for ConvertMUtf8ToUtf16 call.
413 // We should use char from 0x7f to 0x0 if we want to
414 // generate one utf16 (0x00xx) from this mutf8.
415 // NOLINTNEXTLINE(cert-msc50-cpp)
416 uint8_t val1 = rand();
417 val1 = val1 >> 1U;
418 if (val1 == 0) {
419 val1++;
420 }
421
422 // NOLINTNEXTLINE(cert-msc50-cpp)
423 uint8_t val2 = rand();
424 val2 = val2 >> 1U;
425 if (val2 == 0) {
426 val2++;
427 }
428
429 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
430 fString[i] = val1;
431 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
432 sString[i] = val2;
433 }
434 // Set the last elements in strings with size more than 0x8 to disable compressing.
435 // This will leads to count two MUtf-8 bytes as one UTF-16 so length = string_length - 1
436 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
437 fString[STRING_LENGTH - 2U] = uint8_t(0x80);
438 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
439 sString[STRING_LENGTH - 2U] = uint8_t(0x80);
440 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
441 fString[STRING_LENGTH - 1] = uint8_t(0x01);
442 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
443 sString[STRING_LENGTH - 1] = uint8_t(0x01);
444 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
445 fString[STRING_LENGTH] = '\0';
446 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
447 sString[STRING_LENGTH] = '\0';
448
449 String *firstUtf16String = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), STRING_LENGTH - 1,
450 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
451 // Try to use function with automatic length detection
452 String *secondUtf16String = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString),
453 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
454 ASSERT_EQ(firstUtf16String->GetLength(), STRING_LENGTH - 1);
455 ASSERT_EQ(secondUtf16String->GetLength(), STRING_LENGTH - 1);
456
457 // Dirty hack to not create utf16 for our purpose, just reuse old one
458 // Try to create compressed strings.
459 String *firstUtf8String = String::CreateFromUtf16(firstUtf16String->GetDataUtf16(), STRING_LENGTH - 1,
460 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
461 String *secondUtf8String = String::CreateFromUtf16(firstUtf16String->GetDataUtf16(), STRING_LENGTH - 1,
462 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
463 ASSERT_EQ(firstUtf8String->GetLength(), STRING_LENGTH - 1);
464 ASSERT_EQ(secondUtf8String->GetLength(), STRING_LENGTH - 1);
465
466 ASSERT_EQ(String::StringsAreEqual(firstUtf16String, secondUtf16String), strcmp(fString, sString) == 0);
467 ASSERT_EQ(String::StringsAreEqual(firstUtf16String, secondUtf8String),
468 firstUtf16String->IsUtf16() == secondUtf8String->IsUtf16());
469 ASSERT_EQ(String::StringsAreEqual(firstUtf8String, secondUtf8String), true);
470 ASSERT_TRUE(firstUtf16String->IsUtf16());
471 ASSERT_TRUE(String::StringsAreEqualUtf16(firstUtf16String, firstUtf16String->GetDataUtf16(),
472 firstUtf16String->GetLength()));
473
474 delete[] fString;
475 delete[] sString;
476 }
477
TEST_F(StringTest,ObjectSize)478 TEST_F(StringTest, ObjectSize)
479 {
480 {
481 std::vector<uint8_t> data {'1', '2', '3', '4', '5', 0x00};
482 uint32_t utf16Length = data.size() - 1;
483 String *string = String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(),
484 Runtime::GetCurrent()->GetPandaVM());
485 ASSERT_EQ(string->ObjectSize(), String::ComputeSizeMUtf8(utf16Length));
486 }
487
488 {
489 std::vector<uint8_t> data {0x80, 0x01, 0x80, 0x02, 0x00};
490 uint32_t utf16Length = data.size() / 2U;
491 String *string = String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(),
492 Runtime::GetCurrent()->GetPandaVM());
493 ASSERT_EQ(string->ObjectSize(), String::ComputeSizeUtf16(utf16Length));
494 }
495 }
496
TEST_F(StringTest,AtTest)497 TEST_F(StringTest, AtTest)
498 {
499 // utf8
500 std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
501 String *string = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
502 Runtime::GetCurrent()->GetPandaVM());
503 ASSERT_EQ(false, string->IsUtf16());
504 for (uint32_t i = 0; i < data1.size() - 1; i++) {
505 ASSERT_EQ(data1[i], string->At(i));
506 }
507
508 // utf16
509 std::vector<uint16_t> data2 {'a', 'b', 0xab, 0xdc, 'z', 0};
510 string = String::CreateFromUtf16(data2.data(), data2.size() - 1, GetLanguageContext(),
511 Runtime::GetCurrent()->GetPandaVM());
512 ASSERT_EQ(true, string->IsUtf16());
513 for (uint32_t i = 0; i < data2.size() - 1; i++) {
514 ASSERT_EQ(data2[i], string->At(i));
515 }
516
517 // utf16 -> utf8
518 std::vector<uint16_t> data3 {'a', 'b', 121, 122, 'z', 0};
519 string = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
520 Runtime::GetCurrent()->GetPandaVM());
521 ASSERT_EQ(false, string->IsUtf16());
522 for (uint32_t i = 0; i < data3.size() - 1; i++) {
523 ASSERT_EQ(data3[i], string->At(i));
524 }
525 }
526
TEST_F(StringTest,IndexOfTest)527 TEST_F(StringTest, IndexOfTest)
528 {
529 std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
530 std::vector<uint8_t> data2 {'b', 'c', 'd', 0};
531 std::vector<uint16_t> data3 {'a', 'b', 'c', 'd', 'z', 0};
532 std::vector<uint16_t> data4 {'b', 'c', 'd', 0};
533 String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
534 Runtime::GetCurrent()->GetPandaVM());
535 String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
536 Runtime::GetCurrent()->GetPandaVM());
537 String *string3 = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
538 Runtime::GetCurrent()->GetPandaVM());
539 String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
540 Runtime::GetCurrent()->GetPandaVM());
541
542 auto index = string1->IndexOf(string2, 1);
543 auto index1 = string1->IndexOf(string4, 1);
544 auto index2 = string3->IndexOf(string2, 1);
545 auto index3 = string3->IndexOf(string4, 1);
546 std::cout << index << std::endl;
547 ASSERT_EQ(index, index2);
548 ASSERT_EQ(index1, index3);
549 index = string1->IndexOf(string2, 2_I);
550 index1 = string1->IndexOf(string4, 2_I);
551 index2 = string3->IndexOf(string2, 2_I);
552 index3 = string3->IndexOf(string4, 2_I);
553 std::cout << index << std::endl;
554 ASSERT_EQ(index, index2);
555 ASSERT_EQ(index1, index3);
556 }
557
TEST_F(StringTest,IndexOfTest2)558 TEST_F(StringTest, IndexOfTest2)
559 {
560 {
561 std::vector<uint8_t> stringData {'a', 'b', 'a', 'c', 'a', 'b', 'a', 0};
562 std::vector<uint8_t> patternData {'a', 'b', 'a', 0};
563 String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
564 Runtime::GetCurrent()->GetPandaVM());
565 String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
566 Runtime::GetCurrent()->GetPandaVM());
567 ASSERT_EQ(0, string->IndexOf(pattern, -1));
568 ASSERT_EQ(0, string->IndexOf(pattern, 0));
569 ASSERT_EQ(4_I, string->IndexOf(pattern, 1));
570 ASSERT_EQ(4_I, string->IndexOf(pattern, 4_I));
571 ASSERT_EQ(-1, string->IndexOf(pattern, 5_I));
572 ASSERT_EQ(-1, string->IndexOf(pattern, 6_I));
573
574 String *emptyString = String::CreateEmptyString(GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
575 ASSERT_EQ(-1, emptyString->IndexOf(string, 0));
576 ASSERT_EQ(0, string->IndexOf(emptyString, -3_I));
577 ASSERT_EQ(2_I, string->IndexOf(emptyString, 2_I));
578 ASSERT_EQ(7_I, string->IndexOf(emptyString, 10_I));
579 }
580 {
581 std::vector<uint8_t> stringData {'a', 'b', 'c', 'd', 'e', 'f', 'g', 0};
582 std::vector<uint8_t> patternData {'d', 'e', 'f', 0};
583 String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
584 Runtime::GetCurrent()->GetPandaVM());
585 String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
586 Runtime::GetCurrent()->GetPandaVM());
587 ASSERT_EQ(3_I, string->IndexOf(pattern, 0));
588 }
589 {
590 std::vector<uint8_t> stringData {'a', 'b', 'a', 'a', 'a', 'a', 'a', 0};
591 std::vector<uint8_t> patternData {'a', 'a', 'a', 0};
592 String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
593 Runtime::GetCurrent()->GetPandaVM());
594 String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
595 Runtime::GetCurrent()->GetPandaVM());
596 ASSERT_EQ(2_I, string->IndexOf(pattern, 0));
597 ASSERT_EQ(2_I, string->IndexOf(pattern, 2_I));
598 ASSERT_EQ(3_I, string->IndexOf(pattern, 3_I));
599 ASSERT_EQ(4_I, string->IndexOf(pattern, 4_I));
600 ASSERT_EQ(-1, string->IndexOf(pattern, 5_I));
601 }
602 }
603
TEST_F(StringTest,CompareTestUtf8)604 TEST_F(StringTest, CompareTestUtf8)
605 {
606 // utf8
607 std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
608 std::vector<uint8_t> data2 {'a', 'b', 'c', 'd', 'z', 'x', 0};
609 std::vector<uint16_t> data3 {'a', 'b', 'c', 'd', 'z', 0};
610 std::vector<uint16_t> data4 {'a', 'b', 'd', 'c', 'z', 0};
611 String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
612 Runtime::GetCurrent()->GetPandaVM());
613 String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
614 Runtime::GetCurrent()->GetPandaVM());
615 String *string3 = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
616 Runtime::GetCurrent()->GetPandaVM());
617 String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
618 Runtime::GetCurrent()->GetPandaVM());
619 ASSERT_EQ(false, string1->IsUtf16());
620 ASSERT_EQ(false, string2->IsUtf16());
621 ASSERT_EQ(false, string3->IsUtf16());
622 ASSERT_EQ(false, string4->IsUtf16());
623 ASSERT_LT(string1->Compare(string2), 0);
624 ASSERT_GT(string2->Compare(string1), 0);
625 ASSERT_EQ(string1->Compare(string3), 0);
626 ASSERT_EQ(string3->Compare(string1), 0);
627 ASSERT_LT(string2->Compare(string4), 0);
628 ASSERT_GT(string4->Compare(string2), 0);
629
630 // utf8 vs utf16
631 std::vector<uint16_t> data5 {'a', 'b', 0xab, 0xdc, 'z', 0};
632 String *string5 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
633 Runtime::GetCurrent()->GetPandaVM());
634 ASSERT_EQ(true, string5->IsUtf16());
635 ASSERT_LT(string2->Compare(string5), 0);
636 ASSERT_GT(string5->Compare(string2), 0);
637 ASSERT_LT(string4->Compare(string5), 0);
638 ASSERT_GT(string5->Compare(string4), 0);
639
640 // compare with self
641 ASSERT_EQ(string1->Compare(string1), 0);
642 ASSERT_EQ(string2->Compare(string2), 0);
643 ASSERT_EQ(string3->Compare(string3), 0);
644 ASSERT_EQ(string4->Compare(string4), 0);
645 ASSERT_EQ(string5->Compare(string5), 0);
646 }
647
TEST_F(StringTest,CompareTestUtf16)648 TEST_F(StringTest, CompareTestUtf16)
649 {
650 std::vector<uint16_t> data5 {'a', 'b', 0xab, 0xdc, 'z', 0};
651 String *string5 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
652 Runtime::GetCurrent()->GetPandaVM());
653 std::vector<uint16_t> data6 {'a', 0xab, 0xab, 0};
654 String *string6 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
655 Runtime::GetCurrent()->GetPandaVM());
656 String *string7 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
657 Runtime::GetCurrent()->GetPandaVM());
658 ASSERT_EQ(true, string5->IsUtf16());
659 ASSERT_EQ(true, string6->IsUtf16());
660 ASSERT_EQ(true, string7->IsUtf16());
661
662 ASSERT_LT(string5->Compare(string6), 0);
663 ASSERT_GT(string6->Compare(string5), 0);
664 ASSERT_EQ(string6->Compare(string7), 0);
665 ASSERT_EQ(string7->Compare(string6), 0);
666
667 // compare with self
668 ASSERT_EQ(string5->Compare(string5), 0);
669 ASSERT_EQ(string6->Compare(string6), 0);
670 ASSERT_EQ(string7->Compare(string7), 0);
671 }
672
TEST_F(StringTest,CompareTestLongUtf8)673 TEST_F(StringTest, CompareTestLongUtf8)
674 {
675 // long utf8 string vs long utf8 string
676 // utf8
677 std::vector<uint8_t> data8(16U, 'a');
678 data8.push_back(0);
679
680 std::vector<uint8_t> data9(16U, 'a');
681 std::vector<uint8_t> tmp1 {'x', 'z'};
682 data9.insert(data9.end(), tmp1.begin(), tmp1.end());
683 data9.push_back(0);
684
685 std::vector<uint8_t> data10(16U, 'a');
686 std::vector<uint8_t> tmp2 {'x', 'x', 'x', 'y', 'y', 'a', 'a'};
687 data10.insert(data10.end(), tmp2.begin(), tmp2.end());
688 data10.insert(data10.end(), 16U, 'a');
689 data10.push_back(0);
690
691 std::vector<uint8_t> data11(16U, 'a');
692 std::vector<uint8_t> tmp3 {'x', 'x', 'x', 'y', 'y', 'y', 'y'};
693 data11.insert(data11.end(), tmp3.begin(), tmp3.end());
694 data11.insert(data11.end(), 16U, 'a');
695 data11.push_back(0);
696
697 String *string8 = String::CreateFromMUtf8(data8.data(), data8.size() - 1, GetLanguageContext(),
698 Runtime::GetCurrent()->GetPandaVM());
699 String *string9 = String::CreateFromMUtf8(data9.data(), data9.size() - 1, GetLanguageContext(),
700 Runtime::GetCurrent()->GetPandaVM());
701 String *string10 = String::CreateFromMUtf8(data10.data(), data10.size() - 1, GetLanguageContext(),
702 Runtime::GetCurrent()->GetPandaVM());
703 String *string11 = String::CreateFromMUtf8(data11.data(), data11.size() - 1, GetLanguageContext(),
704 Runtime::GetCurrent()->GetPandaVM());
705 String *string12 = String::CreateFromMUtf8(data8.data(), data8.size() - 1, GetLanguageContext(),
706 Runtime::GetCurrent()->GetPandaVM());
707 String *string13 = String::CreateFromMUtf8(data9.data(), data9.size() - 1, GetLanguageContext(),
708 Runtime::GetCurrent()->GetPandaVM());
709
710 // utf8 vs utf8
711 ASSERT_EQ(string8->Compare(string12), 0);
712 ASSERT_EQ(string12->Compare(string8), 0);
713 ASSERT_EQ(string9->Compare(string13), 0);
714 ASSERT_EQ(string13->Compare(string9), 0);
715 ASSERT_LT(string10->Compare(string11), 0);
716 ASSERT_GT(string11->Compare(string10), 0);
717 ASSERT_LT(string10->Compare(string9), 0);
718 ASSERT_GT(string9->Compare(string10), 0);
719 }
720
TEST_F(StringTest,CompareTestLongUtf16)721 TEST_F(StringTest, CompareTestLongUtf16)
722 {
723 // long utf16 string vs long utf16 string
724 // utf16
725 std::vector<uint16_t> data14(16U, 0xab);
726 data14.push_back(0);
727
728 std::vector<uint16_t> data15(16U, 0xab);
729 std::vector<uint16_t> tmp4 {'a', 0xbb};
730 data15.insert(data15.end(), tmp4.begin(), tmp4.end());
731 data15.push_back(0);
732
733 std::vector<uint16_t> data16(16U, 0xab);
734 std::vector<uint16_t> tmp5 {'a', 'a', 0xcc, 0xcc, 0xdd, 0xdd, 0xdd};
735 data16.insert(data16.end(), tmp5.begin(), tmp5.end());
736 data16.insert(data16.end(), 16U, 0xab);
737 data16.push_back(0);
738
739 std::vector<uint16_t> data17(16U, 0xab);
740 std::vector<uint16_t> tmp6 {'a', 'a', 0xdd, 0xdd, 0xdd, 0xdd, 0xdd};
741 data17.insert(data17.end(), tmp6.begin(), tmp6.end());
742 data17.insert(data17.end(), 16U, 0xab);
743 data17.push_back(0);
744
745 String *string14 = String::CreateFromUtf16(data14.data(), data14.size() - 1, GetLanguageContext(),
746 Runtime::GetCurrent()->GetPandaVM());
747 String *string15 = String::CreateFromUtf16(data15.data(), data15.size() - 1, GetLanguageContext(),
748 Runtime::GetCurrent()->GetPandaVM());
749 String *string16 = String::CreateFromUtf16(data16.data(), data16.size() - 1, GetLanguageContext(),
750 Runtime::GetCurrent()->GetPandaVM());
751 String *string17 = String::CreateFromUtf16(data17.data(), data17.size() - 1, GetLanguageContext(),
752 Runtime::GetCurrent()->GetPandaVM());
753 String *string18 = String::CreateFromUtf16(data14.data(), data14.size() - 1, GetLanguageContext(),
754 Runtime::GetCurrent()->GetPandaVM());
755 String *string19 = String::CreateFromUtf16(data15.data(), data15.size() - 1, GetLanguageContext(),
756 Runtime::GetCurrent()->GetPandaVM());
757
758 // utf16 vs utf16
759 ASSERT_EQ(string14->Compare(string18), 0);
760 ASSERT_EQ(string18->Compare(string14), 0);
761 ASSERT_EQ(string15->Compare(string19), 0);
762 ASSERT_EQ(string19->Compare(string15), 0);
763 ASSERT_LT(string16->Compare(string17), 0);
764 ASSERT_GT(string17->Compare(string16), 0);
765 ASSERT_LT(string16->Compare(string15), 0);
766 ASSERT_GT(string15->Compare(string16), 0);
767 }
768
TEST_F(StringTest,ConcatTest)769 TEST_F(StringTest, ConcatTest)
770 {
771 // utf8 + utf8
772 std::vector<uint8_t> data1 {'f', 'g', 'h', 0};
773 std::vector<uint8_t> data2 {'a', 'b', 'c', 'd', 'e', 0};
774 std::vector<uint8_t> data3;
775 data3.insert(data3.end(), data1.begin(), data1.end() - 1);
776 data3.insert(data3.end(), data2.begin(), data2.end());
777
778 String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
779 Runtime::GetCurrent()->GetPandaVM());
780 String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
781 Runtime::GetCurrent()->GetPandaVM());
782 String *string30 = String::CreateFromMUtf8(data3.data(), data3.size() - 1, GetLanguageContext(),
783 Runtime::GetCurrent()->GetPandaVM());
784 ASSERT_EQ(false, string1->IsUtf16());
785 ASSERT_EQ(false, string2->IsUtf16());
786 String *string31 = String::Concat(string1, string2, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
787 ASSERT_EQ(string30->Compare(string31), 0);
788 ASSERT_EQ(string31->Compare(string30), 0);
789
790 // utf8 + utf16
791 std::vector<uint16_t> data4 {'a', 'b', 0xab, 0xdc, 'z', 0};
792 std::vector<uint16_t> data5 {'f', 'g', 'h', 'a', 'b', 0xab, 0xdc, 'z', 0}; // data1 + data4
793 String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
794 Runtime::GetCurrent()->GetPandaVM());
795 String *string50 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
796 Runtime::GetCurrent()->GetPandaVM());
797 String *string51 = String::Concat(string1, string4, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
798 ASSERT_EQ(string50->GetLength(), string51->GetLength());
799 ASSERT_EQ(string50->Compare(string51), 0);
800 ASSERT_EQ(string51->Compare(string50), 0);
801
802 // utf16 + utf16
803 std::vector<uint16_t> data6;
804 data6.insert(data6.end(), data4.begin(), data4.end() - 1);
805 data6.insert(data6.end(), data5.begin(), data5.end());
806 String *string60 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
807 Runtime::GetCurrent()->GetPandaVM());
808 String *string61 = String::Concat(string4, string50, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
809 ASSERT_EQ(string60->Compare(string61), 0);
810 ASSERT_EQ(string61->Compare(string60), 0);
811 }
812
TEST_F(StringTest,DoReplaceTest0)813 TEST_F(StringTest, DoReplaceTest0)
814 {
815 static constexpr uint32_t STRING_LENGTH = 10;
816 char *fString = new char[STRING_LENGTH + 1];
817 char *sString = new char[STRING_LENGTH + 1];
818
819 for (uint32_t i = 0; i < STRING_LENGTH; i++) {
820 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
821 fString[i] = 'A' + i;
822 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
823 sString[i] = 'A' + i;
824 }
825 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
826 fString[0] = 'Z';
827 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
828 fString[STRING_LENGTH] = '\0';
829 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
830 sString[STRING_LENGTH] = '\0';
831
832 String *fStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), GetLanguageContext(),
833 Runtime::GetCurrent()->GetPandaVM());
834 String *sStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString), GetLanguageContext(),
835 Runtime::GetCurrent()->GetPandaVM());
836 String *tStringS = String::DoReplace(fStringS, 'Z', 'A', GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
837 ASSERT_EQ(String::StringsAreEqual(tStringS, sStringS), true);
838
839 delete[] fString;
840 delete[] sString;
841 }
842
TEST_F(StringTest,FastSubstringTest0)843 TEST_F(StringTest, FastSubstringTest0)
844 {
845 uint32_t stringLength = 10;
846 char *fString = new char[stringLength + 1];
847 for (uint32_t i = 0; i < stringLength; i++) {
848 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
849 fString[i] = 'A' + i;
850 }
851 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
852 fString[stringLength] = '\0';
853
854 uint32_t subStringLength = 5;
855 uint32_t subStringStart = 1;
856 char *sString = new char[subStringLength + 1];
857 for (uint32_t j = 0; j < subStringLength; j++) {
858 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
859 sString[j] = fString[subStringStart + j];
860 }
861 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
862 sString[subStringLength] = '\0';
863
864 String *fStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), GetLanguageContext(),
865 Runtime::GetCurrent()->GetPandaVM());
866 String *sStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString), GetLanguageContext(),
867 Runtime::GetCurrent()->GetPandaVM());
868 String *tStringS = String::FastSubString(fStringS, subStringStart, subStringLength, GetLanguageContext(),
869 Runtime::GetCurrent()->GetPandaVM());
870 ASSERT_EQ(String::StringsAreEqual(tStringS, sStringS), true);
871
872 delete[] fString;
873 delete[] sString;
874 }
875
TEST_F(StringTest,ToCharArray)876 TEST_F(StringTest, ToCharArray)
877 {
878 // utf8
879 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'e', 0};
880 String *utf8String = String::CreateFromMUtf8(data.data(), data.size() - 1, GetLanguageContext(),
881 Runtime::GetCurrent()->GetPandaVM());
882 Array *newArray = utf8String->ToCharArray(GetLanguageContext());
883 for (uint32_t i = 0; i < newArray->GetLength(); ++i) {
884 ASSERT_EQ(data[i], newArray->Get<uint16_t>(i));
885 }
886
887 std::vector<uint16_t> data1 {'f', 'g', 'h', 'a', 'b', 0x8ab, 0xdc, 'z', 0};
888 String *utf16String = String::CreateFromUtf16(data1.data(), data1.size() - 1, GetLanguageContext(),
889 Runtime::GetCurrent()->GetPandaVM());
890 Array *newArray1 = utf16String->ToCharArray(GetLanguageContext());
891 for (uint32_t i = 0; i < newArray1->GetLength(); ++i) {
892 ASSERT_EQ(data1[i], newArray1->Get<uint16_t>(i));
893 }
894 }
895
TEST_F(StringTest,CreateNewStingFromCharArray)896 TEST_F(StringTest, CreateNewStingFromCharArray)
897 {
898 std::vector<uint16_t> data {'f', 'g', 'h', 'a', 'b', 0x8ab, 0xdc, 'z', 0};
899 String *utf16String = String::CreateFromUtf16(data.data(), data.size() - 1, GetLanguageContext(),
900 Runtime::GetCurrent()->GetPandaVM());
901 Array *charArray = utf16String->ToCharArray(GetLanguageContext());
902
903 uint32_t charArrayLength = 5;
904 uint32_t charArrayOffset = 1;
905 std::vector<uint16_t> data1(charArrayLength + 1);
906 for (uint32_t i = 0; i < charArrayLength; ++i) {
907 data1[i] = data[i + charArrayOffset];
908 }
909 data1[charArrayLength] = 0;
910 String *utf16String1 = String::CreateFromUtf16(data1.data(), data1.size() - 1, GetLanguageContext(),
911 Runtime::GetCurrent()->GetPandaVM());
912
913 String *result = String::CreateNewStringFromChars(charArrayOffset, charArrayLength, charArray, GetLanguageContext(),
914 Runtime::GetCurrent()->GetPandaVM());
915
916 ASSERT_EQ(String::StringsAreEqual(result, utf16String1), true);
917 }
918
TEST_F(StringTest,CreateNewStingFromByteArray)919 TEST_F(StringTest, CreateNewStingFromByteArray)
920 {
921 std::vector<uint8_t> data {'f', 'g', 'h', 'a', 'b', 0xab, 0xdc, 'z', 0};
922 uint32_t byteArrayLength = 5;
923 uint32_t byteArrayOffset = 1;
924 uint32_t highByte = 0;
925
926 std::vector<uint16_t> data1(byteArrayLength);
927 for (uint32_t i = 0; i < byteArrayLength; ++i) {
928 data1[i] = (highByte << 8U) + (data[i + byteArrayOffset] & 0xFFU);
929 }
930 // NB! data1[byte_array_length] = 0; NOT NEEDED
931 String *string1 = String::CreateFromUtf16(data1.data(), byteArrayLength, GetLanguageContext(),
932 Runtime::GetCurrent()->GetPandaVM());
933
934 LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
935 Class *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ark::ClassRoot::ARRAY_I8);
936 Array *byteArray = Array::Create(klass, data.size() - 1);
937 Span<uint8_t> sp(data.data(), data.size() - 1);
938 for (uint32_t i = 0; i < data.size() - 1; i++) {
939 byteArray->Set<uint8_t>(i, sp[i]);
940 }
941
942 String *result = String::CreateNewStringFromBytes(byteArrayOffset, byteArrayLength, highByte, byteArray,
943 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
944
945 ASSERT_EQ(String::StringsAreEqual(result, string1), true);
946 }
947
948 } // namespace ark::coretypes::test
949
950 // NOLINTEND(readability-magic-numbers)
951