• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <ctime>
17 
18 #include "gtest/gtest.h"
19 #include "libpandabase/utils/span.h"
20 #include "libpandabase/utils/utf.h"
21 #include "libpandabase/utils/utils.h"
22 #include "runtime/include/class_linker_extension.h"
23 #include "runtime/include/coretypes/array-inl.h"
24 #include "runtime/include/coretypes/string-inl.h"
25 #include "runtime/include/runtime.h"
26 #include "runtime/include/thread.h"
27 
28 // NOLINTBEGIN(readability-magic-numbers)
29 
30 namespace ark::coretypes::test {
31 
32 class StringTest : public testing::Test {
33 public:
StringTest()34     StringTest()
35     {
36 #ifdef PANDA_NIGHTLY_TEST_ON
37         seed_ = std::time(NULL);
38 #else
39         seed_ = 0xDEADBEEF;
40 #endif
41         srand(seed_);
42         // We need to create a runtime instance to be able to create strings.
43         options_.SetShouldLoadBootPandaFiles(false);
44         options_.SetShouldInitializeIntrinsics(false);
45         Runtime::Create(options_);
46     }
47 
~StringTest()48     ~StringTest() override
49     {
50         Runtime::Destroy();
51     }
52 
53     NO_COPY_SEMANTIC(StringTest);
54     NO_MOVE_SEMANTIC(StringTest);
55 
GetLanguageContext()56     LanguageContext GetLanguageContext()
57     {
58         return Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
59     }
60 
SetUp()61     void SetUp() override
62     {
63         thread_ = ark::MTManagedThread::GetCurrent();
64         thread_->ManagedCodeBegin();
65     }
66 
TearDown()67     void TearDown() override
68     {
69         thread_->ManagedCodeEnd();
70     }
71 
72 protected:
73     static constexpr uint32_t SIMPLE_UTF8_STRING_LENGTH = 13;
74     // NOLINTNEXTLINE(modernize-avoid-c-arrays)
75     static constexpr char SIMPLE_UTF8_STRING[SIMPLE_UTF8_STRING_LENGTH + 1] = "Hello, world!";
76 
77 private:
78     ark::MTManagedThread *thread_ {};
79     unsigned seed_ {};
80     RuntimeOptions options_;
81 };
82 
TEST_F(StringTest,EqualStringWithCompressedRawUtf8Data)83 TEST_F(StringTest, EqualStringWithCompressedRawUtf8Data)
84 {
85     std::vector<uint8_t> data {0x01, 0x05, 0x07, 0x00};
86     uint32_t utf16Length = data.size() - 1;
87     auto *firstString =
88         String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
89     ASSERT_TRUE(String::StringsAreEqualMUtf8(firstString, data.data(), utf16Length));
90 }
91 
TEST_F(StringTest,EqualStringWithNotCompressedRawUtf8Data)92 TEST_F(StringTest, EqualStringWithNotCompressedRawUtf8Data)
93 {
94     std::vector<uint8_t> data {0xc2, 0xa7};
95 
96     for (size_t i = 0; i < 20U; i++) {
97         data.push_back(0x30 + i);
98     }
99     data.push_back(0);
100 
101     uint32_t utf16Length = data.size() - 2U;
102     auto *firstString =
103         String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
104     ASSERT_TRUE(String::StringsAreEqualMUtf8(firstString, data.data(), utf16Length));
105 }
106 
TEST_F(StringTest,NotEqualStringWithNotCompressedRawUtf8Data)107 TEST_F(StringTest, NotEqualStringWithNotCompressedRawUtf8Data)
108 {
109     std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
110     std::vector<uint8_t> data2 {0xc2, 0xa7, 0x34, 0x00};
111     uint32_t utf16Length = 2;
112     auto *firstString =
113         String::CreateFromMUtf8(data1.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
114     ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length));
115 }
116 
TEST_F(StringTest,NotEqualStringNotCompressedStringWithCompressedRawData)117 TEST_F(StringTest, NotEqualStringNotCompressedStringWithCompressedRawData)
118 {
119     std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
120     std::vector<uint8_t> data2 {0x02, 0x07, 0x04, 0x00};
121     uint32_t utf16Length1 = 2;
122     uint32_t utf16Length2 = 3;
123     auto *firstString =
124         String::CreateFromMUtf8(data1.data(), utf16Length1, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
125     ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length2));
126 }
127 
TEST_F(StringTest,NotEqualCompressedStringWithUncompressedRawUtf8Data)128 TEST_F(StringTest, NotEqualCompressedStringWithUncompressedRawUtf8Data)
129 {
130     std::vector<uint8_t> data1 {0x02, 0x07, 0x04, 0x00};
131     std::vector<uint8_t> data2 {0xc2, 0xa7, 0x33, 0x00};
132     uint32_t utf16Length1 = 3;
133     uint32_t utf16Length2 = 2;
134     auto *firstString =
135         String::CreateFromMUtf8(data1.data(), utf16Length1, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
136     ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length2));
137 }
138 
TEST_F(StringTest,EqualStringWithMUtf8DifferentLength)139 TEST_F(StringTest, EqualStringWithMUtf8DifferentLength)
140 {
141     std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
142     std::vector<uint8_t> data2 {0xc2, 0xa7, 0x00};
143     uint32_t utf16Length = 2;
144     auto *firstString =
145         String::CreateFromMUtf8(data1.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
146     ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length - 1));
147 }
148 
TEST_F(StringTest,EqualStringWithRawUtf16Data)149 TEST_F(StringTest, EqualStringWithRawUtf16Data)
150 {
151     std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
152     auto *firstString =
153         String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
154     auto secondString = reinterpret_cast<const uint16_t *>(data.data());
155     ASSERT_TRUE(String::StringsAreEqualUtf16(firstString, secondString, data.size()));
156 }
157 
TEST_F(StringTest,CompareCompressedStringWithRawUtf16)158 TEST_F(StringTest, CompareCompressedStringWithRawUtf16)
159 {
160     std::vector<uint16_t> data;
161 
162     for (size_t i = 0; i < 30U; i++) {
163         data.push_back(i + 1);
164     }
165     data.push_back(0);
166 
167     auto *firstString = String::CreateFromUtf16(data.data(), data.size() - 1, GetLanguageContext(),
168                                                 Runtime::GetCurrent()->GetPandaVM());
169     auto secondString = reinterpret_cast<const uint16_t *>(data.data());
170     ASSERT_TRUE(String::StringsAreEqualUtf16(firstString, secondString, data.size() - 1));
171 }
172 
TEST_F(StringTest,EqualStringWithRawUtf16DifferentLength)173 TEST_F(StringTest, EqualStringWithRawUtf16DifferentLength)
174 {
175     std::vector<uint16_t> data1 {0xffc3, 0x33, 0x00};
176     std::vector<uint16_t> data2 {0xffc3, 0x33, 0x55, 0x00};
177     auto *firstString =
178         String::CreateFromUtf16(data1.data(), data1.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
179     auto secondString = reinterpret_cast<const uint16_t *>(data2.data());
180     ASSERT_FALSE(String::StringsAreEqualUtf16(firstString, secondString, data2.size()));
181 }
182 
TEST_F(StringTest,NotEqualStringWithRawUtf16Data)183 TEST_F(StringTest, NotEqualStringWithRawUtf16Data)
184 {
185     std::vector<uint16_t> data1 {0xffc3, 0x33, 0x00};
186     std::vector<uint16_t> data2 {0xffc3, 0x34, 0x00};
187     auto *firstString =
188         String::CreateFromUtf16(data1.data(), data1.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
189 
190     auto secondString = reinterpret_cast<const uint16_t *>(data2.data());
191     ASSERT_FALSE(String::StringsAreEqualUtf16(firstString, secondString, data2.size()));
192 }
193 
TEST_F(StringTest,compressedHashCodeUtf8)194 TEST_F(StringTest, compressedHashCodeUtf8)
195 {
196     String *firstString =
197         String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
198                                 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
199     auto stringHashCode = firstString->GetHashcode();
200     auto rawHashCode =
201         String::ComputeHashcodeMutf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH);
202 
203     ASSERT_EQ(stringHashCode, rawHashCode);
204 }
TEST_F(StringTest,notCompressedHashCodeUtf8)205 TEST_F(StringTest, notCompressedHashCodeUtf8)
206 {
207     std::vector<uint8_t> data {0xc2, 0xa7};
208 
209     size_t size = 1;
210     for (size_t i = 0; i < 20U; i++) {
211         data.push_back(0x30 + i);
212         size += 1;
213     }
214     data.push_back(0);
215 
216     String *firstString = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(data.data()), size,
217                                                   GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
218     auto stringHashCode = firstString->GetHashcode();
219     auto rawHashCode = String::ComputeHashcodeMutf8(reinterpret_cast<const uint8_t *>(data.data()), size);
220 
221     ASSERT_EQ(stringHashCode, rawHashCode);
222 }
223 
TEST_F(StringTest,compressedHashCodeUtf16)224 TEST_F(StringTest, compressedHashCodeUtf16)
225 {
226     std::vector<uint16_t> data;
227 
228     size_t size = 30;
229     for (size_t i = 0; i < size; i++) {
230         data.push_back(i + 1);
231     }
232     data.push_back(0);
233 
234     auto *firstString =
235         String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
236     auto stringHashCode = firstString->GetHashcode();
237     auto rawHashCode = String::ComputeHashcodeUtf16(data.data(), data.size());
238     ASSERT_EQ(stringHashCode, rawHashCode);
239 }
240 
TEST_F(StringTest,notCompressedHashCodeUtf16)241 TEST_F(StringTest, notCompressedHashCodeUtf16)
242 {
243     std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
244     auto *firstString =
245         String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
246     auto stringHashCode = firstString->GetHashcode();
247     auto rawHashCode = String::ComputeHashcodeUtf16(data.data(), data.size());
248     ASSERT_EQ(stringHashCode, rawHashCode);
249 }
250 
TEST_F(StringTest,lengthUtf8)251 TEST_F(StringTest, lengthUtf8)
252 {
253     String *string =
254         String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
255                                 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
256     ASSERT_EQ(string->GetLength(), SIMPLE_UTF8_STRING_LENGTH);
257 }
258 
TEST_F(StringTest,lengthUtf16)259 TEST_F(StringTest, lengthUtf16)
260 {
261     std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
262     auto *string =
263         String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
264     ASSERT_EQ(string->GetLength(), data.size());
265 }
266 
TEST_F(StringTest,DifferentLengthStringCompareTest)267 TEST_F(StringTest, DifferentLengthStringCompareTest)
268 {
269     static constexpr uint32_t F_STRING_LENGTH = 8;
270     // NOLINTNEXTLINE(modernize-avoid-c-arrays)
271     static constexpr char F_STRING[F_STRING_LENGTH + 1] = "Hello, w";
272     String *firstString =
273         String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
274                                 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
275     ASSERT_EQ(firstString->GetLength(), SIMPLE_UTF8_STRING_LENGTH);
276     String *secondString = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(F_STRING), F_STRING_LENGTH,
277                                                    GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
278     ASSERT_EQ(secondString->GetLength(), F_STRING_LENGTH);
279     ASSERT_EQ(String::StringsAreEqual(firstString, secondString), false);
280 }
281 
TEST_F(StringTest,ForeignLengthAndCopyTest1b0)282 TEST_F(StringTest, ForeignLengthAndCopyTest1b0)
283 {
284     std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0xc0, 0x80, 0x00};
285     uint32_t utf16Length = data.size();
286     String *string = String::CreateFromMUtf8(data.data(), utf16Length - 2U, GetLanguageContext(),
287                                              Runtime::GetCurrent()->GetPandaVM());  // c080 is U+0000
288     ASSERT_EQ(string->GetMUtf8Length(), data.size());
289     ASSERT_EQ(string->GetUtf16Length(), data.size() - 2U);  // \0 doesn't counts for UTF16
290     std::vector<uint8_t> out8(data.size());
291     ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
292     ASSERT_EQ(out8, data);
293     std::vector<uint16_t> res16 {'a', 'b', 'c', 'd', 'z', 0x00};
294     std::vector<uint16_t> out16(res16.size());
295     ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
296     ASSERT_EQ(out16, res16);
297 }
298 
TEST_F(StringTest,ForeignLengthAndCopyTest1b)299 TEST_F(StringTest, ForeignLengthAndCopyTest1b)
300 {
301     std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0x7f, 0x00};
302     uint32_t utf16Length = data.size();
303     String *string = String::CreateFromMUtf8(data.data(), utf16Length - 1, GetLanguageContext(),
304                                              Runtime::GetCurrent()->GetPandaVM());
305     ASSERT_EQ(string->GetMUtf8Length(), data.size());
306     ASSERT_EQ(string->GetUtf16Length(), data.size() - 1);  // \0 doesn't counts for UTF16
307     std::vector<uint8_t> out8(data.size());
308     ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
309     ASSERT_EQ(out8, data);
310     std::vector<uint16_t> res16 {'a', 'b', 'c', 'd', 'z', 0x7f};
311     std::vector<uint16_t> out16(res16.size());
312     ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
313     ASSERT_EQ(out16, res16);
314 }
315 
TEST_F(StringTest,ForeignLengthAndCopyTest2b)316 TEST_F(StringTest, ForeignLengthAndCopyTest2b)
317 {
318     std::vector<uint8_t> data {0xc2, 0xa7, 0x33, 0x00};  // UTF-16 size is 2
319     String *string =
320         String::CreateFromMUtf8(data.data(), 2U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
321     ASSERT_EQ(string->GetMUtf8Length(), data.size());
322     ASSERT_EQ(string->GetUtf16Length(), 2U);  // \0 doesn't counts for UTF16
323     std::vector<uint8_t> out8(data.size());
324     ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
325     ASSERT_EQ(out8, data);
326     std::vector<uint16_t> res16 {0xa7, 0x33};
327     std::vector<uint16_t> out16(res16.size());
328     ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
329     ASSERT_EQ(out16, res16);
330 }
331 
TEST_F(StringTest,ForeignLengthAndCopyTest3b)332 TEST_F(StringTest, ForeignLengthAndCopyTest3b)
333 {
334     std::vector<uint8_t> data {0xef, 0xbf, 0x83, 0x33, 0x00};  // UTF-16 size is 2
335     String *string =
336         String::CreateFromMUtf8(data.data(), 2U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
337     ASSERT_EQ(string->GetMUtf8Length(), data.size());
338     ASSERT_EQ(string->GetUtf16Length(), 2U);  // \0 doesn't counts for UTF16
339     std::vector<uint8_t> out8(data.size());
340     ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
341     ASSERT_EQ(out8, data);
342     std::vector<uint16_t> res16 {0xffc3, 0x33};
343     std::vector<uint16_t> out16(res16.size());
344     ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
345     ASSERT_EQ(out16, res16);
346 }
347 
TEST_F(StringTest,ForeignLengthAndCopyTest6b)348 TEST_F(StringTest, ForeignLengthAndCopyTest6b)
349 {
350     std::vector<uint8_t> data {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7, 0x20, 0x00};  // UTF-16 size is 3
351     // We support 4-byte utf-8 sequences, so {0xd801, 0xdc37} is encoded to 4 bytes instead of 6
352     std::vector<uint8_t> utf8Data {0xf0, 0x90, 0x90, 0xb7, 0x20, 0x00};
353     String *string =
354         String::CreateFromMUtf8(data.data(), 3U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
355     ASSERT_EQ(string->GetMUtf8Length(), utf8Data.size());
356     ASSERT_EQ(string->GetUtf16Length(), 3U);  // \0 doesn't counts for UTF16
357     std::vector<uint8_t> out8(utf8Data.size());
358     string->CopyDataMUtf8(out8.data(), out8.size(), true);
359     ASSERT_EQ(out8, utf8Data);
360     std::vector<uint16_t> res16 {0xd801, 0xdc37, 0x20};
361     std::vector<uint16_t> out16(res16.size());
362     ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
363     ASSERT_EQ(out16, res16);
364 }
365 
TEST_F(StringTest,RegionCopyTestMutf8)366 TEST_F(StringTest, RegionCopyTestMutf8)
367 {
368     std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0x00};
369     uint32_t utf16Length = data.size() - 1;
370     String *string =
371         String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
372     size_t start = 2;
373     size_t len = string->GetMUtf8Length();
374     std::vector<uint8_t> res = {'c', 'd', 0x00};
375     std::vector<uint8_t> out8(res.size());
376     ASSERT_EQ(string->CopyDataRegionMUtf8(out8.data(), start, len - start - 1 - 1, out8.size()), out8.size() - 1);
377     out8[out8.size() - 1] = '\0';
378     ASSERT_EQ(out8, res);
379     size_t len16 = string->GetUtf16Length();
380     std::vector<uint16_t> res16 = {'c', 'd'};
381     std::vector<uint16_t> out16(res16.size());
382     ASSERT_EQ(string->CopyDataRegionUtf16(out16.data(), start, len16 - start - 1, out16.size()), out16.size());
383     ASSERT_EQ(out16, res16);
384 }
385 
TEST_F(StringTest,RegionCopyTestUtf16)386 TEST_F(StringTest, RegionCopyTestUtf16)
387 {
388     std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0xc2, 0xa7, 0x00};
389     uint32_t utf16Length = data.size() - 1 - 1;
390     String *string =
391         String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
392     size_t start = 2;
393     std::vector<uint8_t> res = {'c', 'd', 'z', 0x00};
394     std::vector<uint8_t> out8(res.size());
395     ASSERT_EQ(string->CopyDataRegionMUtf8(out8.data(), start, 3U, out8.size()), out8.size() - 1);
396     out8[out8.size() - 1] = '\0';
397     ASSERT_EQ(out8, res);
398     size_t len16 = string->GetUtf16Length();
399     std::vector<uint16_t> out16(len16 - start - 1);
400     std::vector<uint16_t> res16 = {'c', 'd', 'z'};
401     ASSERT_EQ(string->CopyDataRegionUtf16(out16.data(), start, 3U, out16.size()), out16.size());
402     ASSERT_EQ(out16, res16);
403 }
404 
TEST_F(StringTest,SameLengthStringCompareTest)405 TEST_F(StringTest, SameLengthStringCompareTest)
406 {
407     static constexpr uint32_t STRING_LENGTH = 10;
408     char *fString = new char[STRING_LENGTH + 1];
409     char *sString = new char[STRING_LENGTH + 1];
410 
411     for (uint32_t i = 0; i < STRING_LENGTH; i++) {
412         // Hack for ConvertMUtf8ToUtf16 call.
413         // We should use char from 0x7f to 0x0 if we want to
414         // generate one utf16 (0x00xx) from this mutf8.
415         // NOLINTNEXTLINE(cert-msc50-cpp)
416         uint8_t val1 = rand();
417         val1 = val1 >> 1U;
418         if (val1 == 0) {
419             val1++;
420         }
421 
422         // NOLINTNEXTLINE(cert-msc50-cpp)
423         uint8_t val2 = rand();
424         val2 = val2 >> 1U;
425         if (val2 == 0) {
426             val2++;
427         }
428 
429         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
430         fString[i] = val1;
431         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
432         sString[i] = val2;
433     }
434     // Set the last elements in strings with size more than 0x8 to disable compressing.
435     // This will leads to count two MUtf-8 bytes as one UTF-16 so length = string_length - 1
436     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
437     fString[STRING_LENGTH - 2U] = uint8_t(0x80);
438     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
439     sString[STRING_LENGTH - 2U] = uint8_t(0x80);
440     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
441     fString[STRING_LENGTH - 1] = uint8_t(0x01);
442     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
443     sString[STRING_LENGTH - 1] = uint8_t(0x01);
444     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
445     fString[STRING_LENGTH] = '\0';
446     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
447     sString[STRING_LENGTH] = '\0';
448 
449     String *firstUtf16String = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), STRING_LENGTH - 1,
450                                                        GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
451     // Try to use function with automatic length detection
452     String *secondUtf16String = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString),
453                                                         GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
454     ASSERT_EQ(firstUtf16String->GetLength(), STRING_LENGTH - 1);
455     ASSERT_EQ(secondUtf16String->GetLength(), STRING_LENGTH - 1);
456 
457     // Dirty hack to not create utf16 for our purpose, just reuse old one
458     // Try to create compressed strings.
459     String *firstUtf8String = String::CreateFromUtf16(firstUtf16String->GetDataUtf16(), STRING_LENGTH - 1,
460                                                       GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
461     String *secondUtf8String = String::CreateFromUtf16(firstUtf16String->GetDataUtf16(), STRING_LENGTH - 1,
462                                                        GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
463     ASSERT_EQ(firstUtf8String->GetLength(), STRING_LENGTH - 1);
464     ASSERT_EQ(secondUtf8String->GetLength(), STRING_LENGTH - 1);
465 
466     ASSERT_EQ(String::StringsAreEqual(firstUtf16String, secondUtf16String), strcmp(fString, sString) == 0);
467     ASSERT_EQ(String::StringsAreEqual(firstUtf16String, secondUtf8String),
468               firstUtf16String->IsUtf16() == secondUtf8String->IsUtf16());
469     ASSERT_EQ(String::StringsAreEqual(firstUtf8String, secondUtf8String), true);
470     ASSERT_TRUE(firstUtf16String->IsUtf16());
471     ASSERT_TRUE(String::StringsAreEqualUtf16(firstUtf16String, firstUtf16String->GetDataUtf16(),
472                                              firstUtf16String->GetLength()));
473 
474     delete[] fString;
475     delete[] sString;
476 }
477 
TEST_F(StringTest,ObjectSize)478 TEST_F(StringTest, ObjectSize)
479 {
480     {
481         std::vector<uint8_t> data {'1', '2', '3', '4', '5', 0x00};
482         uint32_t utf16Length = data.size() - 1;
483         String *string = String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(),
484                                                  Runtime::GetCurrent()->GetPandaVM());
485         ASSERT_EQ(string->ObjectSize(), String::ComputeSizeMUtf8(utf16Length));
486     }
487 
488     {
489         std::vector<uint8_t> data {0x80, 0x01, 0x80, 0x02, 0x00};
490         uint32_t utf16Length = data.size() / 2U;
491         String *string = String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(),
492                                                  Runtime::GetCurrent()->GetPandaVM());
493         ASSERT_EQ(string->ObjectSize(), String::ComputeSizeUtf16(utf16Length));
494     }
495 }
496 
TEST_F(StringTest,AtTest)497 TEST_F(StringTest, AtTest)
498 {
499     // utf8
500     std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
501     String *string = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
502                                              Runtime::GetCurrent()->GetPandaVM());
503     ASSERT_EQ(false, string->IsUtf16());
504     for (uint32_t i = 0; i < data1.size() - 1; i++) {
505         ASSERT_EQ(data1[i], string->At(i));
506     }
507 
508     // utf16
509     std::vector<uint16_t> data2 {'a', 'b', 0xab, 0xdc, 'z', 0};
510     string = String::CreateFromUtf16(data2.data(), data2.size() - 1, GetLanguageContext(),
511                                      Runtime::GetCurrent()->GetPandaVM());
512     ASSERT_EQ(true, string->IsUtf16());
513     for (uint32_t i = 0; i < data2.size() - 1; i++) {
514         ASSERT_EQ(data2[i], string->At(i));
515     }
516 
517     // utf16 -> utf8
518     std::vector<uint16_t> data3 {'a', 'b', 121, 122, 'z', 0};
519     string = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
520                                      Runtime::GetCurrent()->GetPandaVM());
521     ASSERT_EQ(false, string->IsUtf16());
522     for (uint32_t i = 0; i < data3.size() - 1; i++) {
523         ASSERT_EQ(data3[i], string->At(i));
524     }
525 }
526 
TEST_F(StringTest,IndexOfTest)527 TEST_F(StringTest, IndexOfTest)
528 {
529     std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
530     std::vector<uint8_t> data2 {'b', 'c', 'd', 0};
531     std::vector<uint16_t> data3 {'a', 'b', 'c', 'd', 'z', 0};
532     std::vector<uint16_t> data4 {'b', 'c', 'd', 0};
533     String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
534                                               Runtime::GetCurrent()->GetPandaVM());
535     String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
536                                               Runtime::GetCurrent()->GetPandaVM());
537     String *string3 = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
538                                               Runtime::GetCurrent()->GetPandaVM());
539     String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
540                                               Runtime::GetCurrent()->GetPandaVM());
541 
542     auto index = string1->IndexOf(string2, 1);
543     auto index1 = string1->IndexOf(string4, 1);
544     auto index2 = string3->IndexOf(string2, 1);
545     auto index3 = string3->IndexOf(string4, 1);
546     std::cout << index << std::endl;
547     ASSERT_EQ(index, index2);
548     ASSERT_EQ(index1, index3);
549     index = string1->IndexOf(string2, 2_I);
550     index1 = string1->IndexOf(string4, 2_I);
551     index2 = string3->IndexOf(string2, 2_I);
552     index3 = string3->IndexOf(string4, 2_I);
553     std::cout << index << std::endl;
554     ASSERT_EQ(index, index2);
555     ASSERT_EQ(index1, index3);
556 }
557 
TEST_F(StringTest,IndexOfTest2)558 TEST_F(StringTest, IndexOfTest2)
559 {
560     {
561         std::vector<uint8_t> stringData {'a', 'b', 'a', 'c', 'a', 'b', 'a', 0};
562         std::vector<uint8_t> patternData {'a', 'b', 'a', 0};
563         String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
564                                                  Runtime::GetCurrent()->GetPandaVM());
565         String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
566                                                   Runtime::GetCurrent()->GetPandaVM());
567         ASSERT_EQ(0, string->IndexOf(pattern, -1));
568         ASSERT_EQ(0, string->IndexOf(pattern, 0));
569         ASSERT_EQ(4_I, string->IndexOf(pattern, 1));
570         ASSERT_EQ(4_I, string->IndexOf(pattern, 4_I));
571         ASSERT_EQ(-1, string->IndexOf(pattern, 5_I));
572         ASSERT_EQ(-1, string->IndexOf(pattern, 6_I));
573 
574         String *emptyString = String::CreateEmptyString(GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
575         ASSERT_EQ(-1, emptyString->IndexOf(string, 0));
576         ASSERT_EQ(0, string->IndexOf(emptyString, -3_I));
577         ASSERT_EQ(2_I, string->IndexOf(emptyString, 2_I));
578         ASSERT_EQ(7_I, string->IndexOf(emptyString, 10_I));
579     }
580     {
581         std::vector<uint8_t> stringData {'a', 'b', 'c', 'd', 'e', 'f', 'g', 0};
582         std::vector<uint8_t> patternData {'d', 'e', 'f', 0};
583         String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
584                                                  Runtime::GetCurrent()->GetPandaVM());
585         String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
586                                                   Runtime::GetCurrent()->GetPandaVM());
587         ASSERT_EQ(3_I, string->IndexOf(pattern, 0));
588     }
589     {
590         std::vector<uint8_t> stringData {'a', 'b', 'a', 'a', 'a', 'a', 'a', 0};
591         std::vector<uint8_t> patternData {'a', 'a', 'a', 0};
592         String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
593                                                  Runtime::GetCurrent()->GetPandaVM());
594         String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
595                                                   Runtime::GetCurrent()->GetPandaVM());
596         ASSERT_EQ(2_I, string->IndexOf(pattern, 0));
597         ASSERT_EQ(2_I, string->IndexOf(pattern, 2_I));
598         ASSERT_EQ(3_I, string->IndexOf(pattern, 3_I));
599         ASSERT_EQ(4_I, string->IndexOf(pattern, 4_I));
600         ASSERT_EQ(-1, string->IndexOf(pattern, 5_I));
601     }
602 }
603 
TEST_F(StringTest,CompareTestUtf8)604 TEST_F(StringTest, CompareTestUtf8)
605 {
606     // utf8
607     std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
608     std::vector<uint8_t> data2 {'a', 'b', 'c', 'd', 'z', 'x', 0};
609     std::vector<uint16_t> data3 {'a', 'b', 'c', 'd', 'z', 0};
610     std::vector<uint16_t> data4 {'a', 'b', 'd', 'c', 'z', 0};
611     String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
612                                               Runtime::GetCurrent()->GetPandaVM());
613     String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
614                                               Runtime::GetCurrent()->GetPandaVM());
615     String *string3 = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
616                                               Runtime::GetCurrent()->GetPandaVM());
617     String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
618                                               Runtime::GetCurrent()->GetPandaVM());
619     ASSERT_EQ(false, string1->IsUtf16());
620     ASSERT_EQ(false, string2->IsUtf16());
621     ASSERT_EQ(false, string3->IsUtf16());
622     ASSERT_EQ(false, string4->IsUtf16());
623     ASSERT_LT(string1->Compare(string2), 0);
624     ASSERT_GT(string2->Compare(string1), 0);
625     ASSERT_EQ(string1->Compare(string3), 0);
626     ASSERT_EQ(string3->Compare(string1), 0);
627     ASSERT_LT(string2->Compare(string4), 0);
628     ASSERT_GT(string4->Compare(string2), 0);
629 
630     // utf8 vs utf16
631     std::vector<uint16_t> data5 {'a', 'b', 0xab, 0xdc, 'z', 0};
632     String *string5 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
633                                               Runtime::GetCurrent()->GetPandaVM());
634     ASSERT_EQ(true, string5->IsUtf16());
635     ASSERT_LT(string2->Compare(string5), 0);
636     ASSERT_GT(string5->Compare(string2), 0);
637     ASSERT_LT(string4->Compare(string5), 0);
638     ASSERT_GT(string5->Compare(string4), 0);
639 
640     // compare with self
641     ASSERT_EQ(string1->Compare(string1), 0);
642     ASSERT_EQ(string2->Compare(string2), 0);
643     ASSERT_EQ(string3->Compare(string3), 0);
644     ASSERT_EQ(string4->Compare(string4), 0);
645     ASSERT_EQ(string5->Compare(string5), 0);
646 }
647 
TEST_F(StringTest,CompareTestUtf16)648 TEST_F(StringTest, CompareTestUtf16)
649 {
650     std::vector<uint16_t> data5 {'a', 'b', 0xab, 0xdc, 'z', 0};
651     String *string5 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
652                                               Runtime::GetCurrent()->GetPandaVM());
653     std::vector<uint16_t> data6 {'a', 0xab, 0xab, 0};
654     String *string6 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
655                                               Runtime::GetCurrent()->GetPandaVM());
656     String *string7 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
657                                               Runtime::GetCurrent()->GetPandaVM());
658     ASSERT_EQ(true, string5->IsUtf16());
659     ASSERT_EQ(true, string6->IsUtf16());
660     ASSERT_EQ(true, string7->IsUtf16());
661 
662     ASSERT_LT(string5->Compare(string6), 0);
663     ASSERT_GT(string6->Compare(string5), 0);
664     ASSERT_EQ(string6->Compare(string7), 0);
665     ASSERT_EQ(string7->Compare(string6), 0);
666 
667     // compare with self
668     ASSERT_EQ(string5->Compare(string5), 0);
669     ASSERT_EQ(string6->Compare(string6), 0);
670     ASSERT_EQ(string7->Compare(string7), 0);
671 }
672 
TEST_F(StringTest,CompareTestLongUtf8)673 TEST_F(StringTest, CompareTestLongUtf8)
674 {
675     // long utf8 string vs long utf8 string
676     // utf8
677     std::vector<uint8_t> data8(16U, 'a');
678     data8.push_back(0);
679 
680     std::vector<uint8_t> data9(16U, 'a');
681     std::vector<uint8_t> tmp1 {'x', 'z'};
682     data9.insert(data9.end(), tmp1.begin(), tmp1.end());
683     data9.push_back(0);
684 
685     std::vector<uint8_t> data10(16U, 'a');
686     std::vector<uint8_t> tmp2 {'x', 'x', 'x', 'y', 'y', 'a', 'a'};
687     data10.insert(data10.end(), tmp2.begin(), tmp2.end());
688     data10.insert(data10.end(), 16U, 'a');
689     data10.push_back(0);
690 
691     std::vector<uint8_t> data11(16U, 'a');
692     std::vector<uint8_t> tmp3 {'x', 'x', 'x', 'y', 'y', 'y', 'y'};
693     data11.insert(data11.end(), tmp3.begin(), tmp3.end());
694     data11.insert(data11.end(), 16U, 'a');
695     data11.push_back(0);
696 
697     String *string8 = String::CreateFromMUtf8(data8.data(), data8.size() - 1, GetLanguageContext(),
698                                               Runtime::GetCurrent()->GetPandaVM());
699     String *string9 = String::CreateFromMUtf8(data9.data(), data9.size() - 1, GetLanguageContext(),
700                                               Runtime::GetCurrent()->GetPandaVM());
701     String *string10 = String::CreateFromMUtf8(data10.data(), data10.size() - 1, GetLanguageContext(),
702                                                Runtime::GetCurrent()->GetPandaVM());
703     String *string11 = String::CreateFromMUtf8(data11.data(), data11.size() - 1, GetLanguageContext(),
704                                                Runtime::GetCurrent()->GetPandaVM());
705     String *string12 = String::CreateFromMUtf8(data8.data(), data8.size() - 1, GetLanguageContext(),
706                                                Runtime::GetCurrent()->GetPandaVM());
707     String *string13 = String::CreateFromMUtf8(data9.data(), data9.size() - 1, GetLanguageContext(),
708                                                Runtime::GetCurrent()->GetPandaVM());
709 
710     // utf8 vs utf8
711     ASSERT_EQ(string8->Compare(string12), 0);
712     ASSERT_EQ(string12->Compare(string8), 0);
713     ASSERT_EQ(string9->Compare(string13), 0);
714     ASSERT_EQ(string13->Compare(string9), 0);
715     ASSERT_LT(string10->Compare(string11), 0);
716     ASSERT_GT(string11->Compare(string10), 0);
717     ASSERT_LT(string10->Compare(string9), 0);
718     ASSERT_GT(string9->Compare(string10), 0);
719 }
720 
TEST_F(StringTest,CompareTestLongUtf16)721 TEST_F(StringTest, CompareTestLongUtf16)
722 {
723     // long utf16 string vs long utf16 string
724     // utf16
725     std::vector<uint16_t> data14(16U, 0xab);
726     data14.push_back(0);
727 
728     std::vector<uint16_t> data15(16U, 0xab);
729     std::vector<uint16_t> tmp4 {'a', 0xbb};
730     data15.insert(data15.end(), tmp4.begin(), tmp4.end());
731     data15.push_back(0);
732 
733     std::vector<uint16_t> data16(16U, 0xab);
734     std::vector<uint16_t> tmp5 {'a', 'a', 0xcc, 0xcc, 0xdd, 0xdd, 0xdd};
735     data16.insert(data16.end(), tmp5.begin(), tmp5.end());
736     data16.insert(data16.end(), 16U, 0xab);
737     data16.push_back(0);
738 
739     std::vector<uint16_t> data17(16U, 0xab);
740     std::vector<uint16_t> tmp6 {'a', 'a', 0xdd, 0xdd, 0xdd, 0xdd, 0xdd};
741     data17.insert(data17.end(), tmp6.begin(), tmp6.end());
742     data17.insert(data17.end(), 16U, 0xab);
743     data17.push_back(0);
744 
745     String *string14 = String::CreateFromUtf16(data14.data(), data14.size() - 1, GetLanguageContext(),
746                                                Runtime::GetCurrent()->GetPandaVM());
747     String *string15 = String::CreateFromUtf16(data15.data(), data15.size() - 1, GetLanguageContext(),
748                                                Runtime::GetCurrent()->GetPandaVM());
749     String *string16 = String::CreateFromUtf16(data16.data(), data16.size() - 1, GetLanguageContext(),
750                                                Runtime::GetCurrent()->GetPandaVM());
751     String *string17 = String::CreateFromUtf16(data17.data(), data17.size() - 1, GetLanguageContext(),
752                                                Runtime::GetCurrent()->GetPandaVM());
753     String *string18 = String::CreateFromUtf16(data14.data(), data14.size() - 1, GetLanguageContext(),
754                                                Runtime::GetCurrent()->GetPandaVM());
755     String *string19 = String::CreateFromUtf16(data15.data(), data15.size() - 1, GetLanguageContext(),
756                                                Runtime::GetCurrent()->GetPandaVM());
757 
758     // utf16 vs utf16
759     ASSERT_EQ(string14->Compare(string18), 0);
760     ASSERT_EQ(string18->Compare(string14), 0);
761     ASSERT_EQ(string15->Compare(string19), 0);
762     ASSERT_EQ(string19->Compare(string15), 0);
763     ASSERT_LT(string16->Compare(string17), 0);
764     ASSERT_GT(string17->Compare(string16), 0);
765     ASSERT_LT(string16->Compare(string15), 0);
766     ASSERT_GT(string15->Compare(string16), 0);
767 }
768 
TEST_F(StringTest,ConcatTest)769 TEST_F(StringTest, ConcatTest)
770 {
771     // utf8 + utf8
772     std::vector<uint8_t> data1 {'f', 'g', 'h', 0};
773     std::vector<uint8_t> data2 {'a', 'b', 'c', 'd', 'e', 0};
774     std::vector<uint8_t> data3;
775     data3.insert(data3.end(), data1.begin(), data1.end() - 1);
776     data3.insert(data3.end(), data2.begin(), data2.end());
777 
778     String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
779                                               Runtime::GetCurrent()->GetPandaVM());
780     String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
781                                               Runtime::GetCurrent()->GetPandaVM());
782     String *string30 = String::CreateFromMUtf8(data3.data(), data3.size() - 1, GetLanguageContext(),
783                                                Runtime::GetCurrent()->GetPandaVM());
784     ASSERT_EQ(false, string1->IsUtf16());
785     ASSERT_EQ(false, string2->IsUtf16());
786     String *string31 = String::Concat(string1, string2, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
787     ASSERT_EQ(string30->Compare(string31), 0);
788     ASSERT_EQ(string31->Compare(string30), 0);
789 
790     // utf8 + utf16
791     std::vector<uint16_t> data4 {'a', 'b', 0xab, 0xdc, 'z', 0};
792     std::vector<uint16_t> data5 {'f', 'g', 'h', 'a', 'b', 0xab, 0xdc, 'z', 0};  // data1 + data4
793     String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
794                                               Runtime::GetCurrent()->GetPandaVM());
795     String *string50 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
796                                                Runtime::GetCurrent()->GetPandaVM());
797     String *string51 = String::Concat(string1, string4, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
798     ASSERT_EQ(string50->GetLength(), string51->GetLength());
799     ASSERT_EQ(string50->Compare(string51), 0);
800     ASSERT_EQ(string51->Compare(string50), 0);
801 
802     // utf16 + utf16
803     std::vector<uint16_t> data6;
804     data6.insert(data6.end(), data4.begin(), data4.end() - 1);
805     data6.insert(data6.end(), data5.begin(), data5.end());
806     String *string60 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
807                                                Runtime::GetCurrent()->GetPandaVM());
808     String *string61 = String::Concat(string4, string50, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
809     ASSERT_EQ(string60->Compare(string61), 0);
810     ASSERT_EQ(string61->Compare(string60), 0);
811 }
812 
TEST_F(StringTest,DoReplaceTest0)813 TEST_F(StringTest, DoReplaceTest0)
814 {
815     static constexpr uint32_t STRING_LENGTH = 10;
816     char *fString = new char[STRING_LENGTH + 1];
817     char *sString = new char[STRING_LENGTH + 1];
818 
819     for (uint32_t i = 0; i < STRING_LENGTH; i++) {
820         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
821         fString[i] = 'A' + i;
822         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
823         sString[i] = 'A' + i;
824     }
825     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
826     fString[0] = 'Z';
827     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
828     fString[STRING_LENGTH] = '\0';
829     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
830     sString[STRING_LENGTH] = '\0';
831 
832     String *fStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), GetLanguageContext(),
833                                                Runtime::GetCurrent()->GetPandaVM());
834     String *sStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString), GetLanguageContext(),
835                                                Runtime::GetCurrent()->GetPandaVM());
836     String *tStringS = String::DoReplace(fStringS, 'Z', 'A', GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
837     ASSERT_EQ(String::StringsAreEqual(tStringS, sStringS), true);
838 
839     delete[] fString;
840     delete[] sString;
841 }
842 
TEST_F(StringTest,FastSubstringTest0)843 TEST_F(StringTest, FastSubstringTest0)
844 {
845     uint32_t stringLength = 10;
846     char *fString = new char[stringLength + 1];
847     for (uint32_t i = 0; i < stringLength; i++) {
848         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
849         fString[i] = 'A' + i;
850     }
851     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
852     fString[stringLength] = '\0';
853 
854     uint32_t subStringLength = 5;
855     uint32_t subStringStart = 1;
856     char *sString = new char[subStringLength + 1];
857     for (uint32_t j = 0; j < subStringLength; j++) {
858         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
859         sString[j] = fString[subStringStart + j];
860     }
861     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
862     sString[subStringLength] = '\0';
863 
864     String *fStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), GetLanguageContext(),
865                                                Runtime::GetCurrent()->GetPandaVM());
866     String *sStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString), GetLanguageContext(),
867                                                Runtime::GetCurrent()->GetPandaVM());
868     String *tStringS = String::FastSubString(fStringS, subStringStart, subStringLength, GetLanguageContext(),
869                                              Runtime::GetCurrent()->GetPandaVM());
870     ASSERT_EQ(String::StringsAreEqual(tStringS, sStringS), true);
871 
872     delete[] fString;
873     delete[] sString;
874 }
875 
TEST_F(StringTest,ToCharArray)876 TEST_F(StringTest, ToCharArray)
877 {
878     // utf8
879     std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'e', 0};
880     String *utf8String = String::CreateFromMUtf8(data.data(), data.size() - 1, GetLanguageContext(),
881                                                  Runtime::GetCurrent()->GetPandaVM());
882     Array *newArray = utf8String->ToCharArray(GetLanguageContext());
883     for (uint32_t i = 0; i < newArray->GetLength(); ++i) {
884         ASSERT_EQ(data[i], newArray->Get<uint16_t>(i));
885     }
886 
887     std::vector<uint16_t> data1 {'f', 'g', 'h', 'a', 'b', 0x8ab, 0xdc, 'z', 0};
888     String *utf16String = String::CreateFromUtf16(data1.data(), data1.size() - 1, GetLanguageContext(),
889                                                   Runtime::GetCurrent()->GetPandaVM());
890     Array *newArray1 = utf16String->ToCharArray(GetLanguageContext());
891     for (uint32_t i = 0; i < newArray1->GetLength(); ++i) {
892         ASSERT_EQ(data1[i], newArray1->Get<uint16_t>(i));
893     }
894 }
895 
TEST_F(StringTest,CreateNewStingFromCharArray)896 TEST_F(StringTest, CreateNewStingFromCharArray)
897 {
898     std::vector<uint16_t> data {'f', 'g', 'h', 'a', 'b', 0x8ab, 0xdc, 'z', 0};
899     String *utf16String = String::CreateFromUtf16(data.data(), data.size() - 1, GetLanguageContext(),
900                                                   Runtime::GetCurrent()->GetPandaVM());
901     Array *charArray = utf16String->ToCharArray(GetLanguageContext());
902 
903     uint32_t charArrayLength = 5;
904     uint32_t charArrayOffset = 1;
905     std::vector<uint16_t> data1(charArrayLength + 1);
906     for (uint32_t i = 0; i < charArrayLength; ++i) {
907         data1[i] = data[i + charArrayOffset];
908     }
909     data1[charArrayLength] = 0;
910     String *utf16String1 = String::CreateFromUtf16(data1.data(), data1.size() - 1, GetLanguageContext(),
911                                                    Runtime::GetCurrent()->GetPandaVM());
912 
913     String *result = String::CreateNewStringFromChars(charArrayOffset, charArrayLength, charArray, GetLanguageContext(),
914                                                       Runtime::GetCurrent()->GetPandaVM());
915 
916     ASSERT_EQ(String::StringsAreEqual(result, utf16String1), true);
917 }
918 
TEST_F(StringTest,CreateNewStingFromByteArray)919 TEST_F(StringTest, CreateNewStingFromByteArray)
920 {
921     std::vector<uint8_t> data {'f', 'g', 'h', 'a', 'b', 0xab, 0xdc, 'z', 0};
922     uint32_t byteArrayLength = 5;
923     uint32_t byteArrayOffset = 1;
924     uint32_t highByte = 0;
925 
926     std::vector<uint16_t> data1(byteArrayLength);
927     for (uint32_t i = 0; i < byteArrayLength; ++i) {
928         data1[i] = (highByte << 8U) + (data[i + byteArrayOffset] & 0xFFU);
929     }
930     // NB! data1[byte_array_length] = 0; NOT NEEDED
931     String *string1 = String::CreateFromUtf16(data1.data(), byteArrayLength, GetLanguageContext(),
932                                               Runtime::GetCurrent()->GetPandaVM());
933 
934     LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
935     Class *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ark::ClassRoot::ARRAY_I8);
936     Array *byteArray = Array::Create(klass, data.size() - 1);
937     Span<uint8_t> sp(data.data(), data.size() - 1);
938     for (uint32_t i = 0; i < data.size() - 1; i++) {
939         byteArray->Set<uint8_t>(i, sp[i]);
940     }
941 
942     String *result = String::CreateNewStringFromBytes(byteArrayOffset, byteArrayLength, highByte, byteArray,
943                                                       GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
944 
945     ASSERT_EQ(String::StringsAreEqual(result, string1), true);
946 }
947 
948 }  // namespace ark::coretypes::test
949 
950 // NOLINTEND(readability-magic-numbers)
951