• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <ctime>
17 
18 #include "gtest/gtest.h"
19 #include "libpandabase/utils/span.h"
20 #include "libpandabase/utils/utf.h"
21 #include "libpandabase/utils/utils.h"
22 #include "runtime/include/class_linker_extension.h"
23 #include "runtime/include/coretypes/array-inl.h"
24 #include "runtime/include/coretypes/string-inl.h"
25 #include "runtime/include/runtime.h"
26 #include "runtime/include/thread.h"
27 
28 // NOLINTBEGIN(readability-magic-numbers)
29 
30 namespace ark::coretypes::test {
31 
32 class StringTest : public testing::Test {
33 public:
StringTest()34     StringTest()
35     {
36 #ifdef PANDA_NIGHTLY_TEST_ON
37         seed_ = std::time(NULL);
38 #else
39         seed_ = 0xDEADBEEF;
40 #endif
41         srand(seed_);
42         // We need to create a runtime instance to be able to create strings.
43         options_.SetShouldLoadBootPandaFiles(false);
44         options_.SetShouldInitializeIntrinsics(false);
45         Runtime::Create(options_);
46     }
47 
~StringTest()48     ~StringTest() override
49     {
50         Runtime::Destroy();
51     }
52 
53     NO_COPY_SEMANTIC(StringTest);
54     NO_MOVE_SEMANTIC(StringTest);
55 
GetLanguageContext()56     LanguageContext GetLanguageContext()
57     {
58         return Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
59     }
60 
SetUp()61     void SetUp() override
62     {
63         thread_ = ark::MTManagedThread::GetCurrent();
64         thread_->ManagedCodeBegin();
65     }
66 
TearDown()67     void TearDown() override
68     {
69         thread_->ManagedCodeEnd();
70     }
71 
72 protected:
73     static constexpr uint32_t SIMPLE_UTF8_STRING_LENGTH = 13;
74     // NOLINTNEXTLINE(modernize-avoid-c-arrays)
75     static constexpr char SIMPLE_UTF8_STRING[SIMPLE_UTF8_STRING_LENGTH + 1] = "Hello, world!";
76 
77 private:
78     ark::MTManagedThread *thread_ {};
79     unsigned seed_ {};
80     RuntimeOptions options_;
81 };
82 
TEST_F(StringTest,EqualStringWithCompressedRawUtf8Data)83 TEST_F(StringTest, EqualStringWithCompressedRawUtf8Data)
84 {
85     std::vector<uint8_t> data {0x01, 0x05, 0x07, 0x00};
86     uint32_t utf16Length = data.size() - 1;
87     auto *firstString =
88         String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
89     ASSERT_TRUE(String::StringsAreEqualMUtf8(firstString, data.data(), utf16Length));
90 }
91 
TEST_F(StringTest,EqualStringWithNotCompressedRawUtf8Data)92 TEST_F(StringTest, EqualStringWithNotCompressedRawUtf8Data)
93 {
94     std::vector<uint8_t> data {0xc2, 0xa7};
95 
96     for (size_t i = 0; i < 20U; i++) {
97         data.push_back(0x30 + i);
98     }
99     data.push_back(0);
100 
101     uint32_t utf16Length = data.size() - 2U;
102     auto *firstString =
103         String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
104     ASSERT_TRUE(String::StringsAreEqualMUtf8(firstString, data.data(), utf16Length));
105 }
106 
TEST_F(StringTest,NotEqualStringWithNotCompressedRawUtf8Data)107 TEST_F(StringTest, NotEqualStringWithNotCompressedRawUtf8Data)
108 {
109     std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
110     std::vector<uint8_t> data2 {0xc2, 0xa7, 0x34, 0x00};
111     uint32_t utf16Length = 2;
112     auto *firstString =
113         String::CreateFromMUtf8(data1.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
114     ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length));
115 }
116 
TEST_F(StringTest,NotEqualStringNotCompressedStringWithCompressedRawData)117 TEST_F(StringTest, NotEqualStringNotCompressedStringWithCompressedRawData)
118 {
119     std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
120     std::vector<uint8_t> data2 {0x02, 0x07, 0x04, 0x00};
121     uint32_t utf16Length1 = 2;
122     uint32_t utf16Length2 = 3;
123     auto *firstString =
124         String::CreateFromMUtf8(data1.data(), utf16Length1, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
125     ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length2));
126 }
127 
TEST_F(StringTest,NotEqualCompressedStringWithUncompressedRawUtf8Data)128 TEST_F(StringTest, NotEqualCompressedStringWithUncompressedRawUtf8Data)
129 {
130     std::vector<uint8_t> data1 {0x02, 0x07, 0x04, 0x00};
131     std::vector<uint8_t> data2 {0xc2, 0xa7, 0x33, 0x00};
132     uint32_t utf16Length1 = 3;
133     uint32_t utf16Length2 = 2;
134     auto *firstString =
135         String::CreateFromMUtf8(data1.data(), utf16Length1, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
136     ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length2));
137 }
138 
TEST_F(StringTest,EqualStringWithMUtf8DifferentLength)139 TEST_F(StringTest, EqualStringWithMUtf8DifferentLength)
140 {
141     std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
142     std::vector<uint8_t> data2 {0xc2, 0xa7, 0x00};
143     uint32_t utf16Length = 2;
144     auto *firstString =
145         String::CreateFromMUtf8(data1.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
146     ASSERT_FALSE(String::StringsAreEqualMUtf8(firstString, data2.data(), utf16Length - 1));
147 }
148 
TEST_F(StringTest,EqualStringWithRawUtf16Data)149 TEST_F(StringTest, EqualStringWithRawUtf16Data)
150 {
151     std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
152     auto *firstString =
153         String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
154     auto secondString = reinterpret_cast<const uint16_t *>(data.data());
155     ASSERT_TRUE(String::StringsAreEqualUtf16(firstString, secondString, data.size()));
156 }
157 
TEST_F(StringTest,CompareCompressedStringWithRawUtf16)158 TEST_F(StringTest, CompareCompressedStringWithRawUtf16)
159 {
160     std::vector<uint16_t> data;
161 
162     for (size_t i = 0; i < 30U; i++) {
163         data.push_back(i + 1);
164     }
165     data.push_back(0);
166 
167     auto *firstString = String::CreateFromUtf16(data.data(), data.size() - 1, GetLanguageContext(),
168                                                 Runtime::GetCurrent()->GetPandaVM());
169     auto secondString = reinterpret_cast<const uint16_t *>(data.data());
170     ASSERT_TRUE(String::StringsAreEqualUtf16(firstString, secondString, data.size() - 1));
171 }
172 
TEST_F(StringTest,EqualStringWithRawUtf16DifferentLength)173 TEST_F(StringTest, EqualStringWithRawUtf16DifferentLength)
174 {
175     std::vector<uint16_t> data1 {0xffc3, 0x33, 0x00};
176     std::vector<uint16_t> data2 {0xffc3, 0x33, 0x55, 0x00};
177     auto *firstString =
178         String::CreateFromUtf16(data1.data(), data1.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
179     auto secondString = reinterpret_cast<const uint16_t *>(data2.data());
180     ASSERT_FALSE(String::StringsAreEqualUtf16(firstString, secondString, data2.size()));
181 }
182 
TEST_F(StringTest,NotEqualStringWithRawUtf16Data)183 TEST_F(StringTest, NotEqualStringWithRawUtf16Data)
184 {
185     std::vector<uint16_t> data1 {0xffc3, 0x33, 0x00};
186     std::vector<uint16_t> data2 {0xffc3, 0x34, 0x00};
187     auto *firstString =
188         String::CreateFromUtf16(data1.data(), data1.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
189 
190     auto secondString = reinterpret_cast<const uint16_t *>(data2.data());
191     ASSERT_FALSE(String::StringsAreEqualUtf16(firstString, secondString, data2.size()));
192 }
193 
TEST_F(StringTest,compressedHashCodeUtf8)194 TEST_F(StringTest, compressedHashCodeUtf8)
195 {
196     String *firstString =
197         String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
198                                 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
199     auto stringHashCode = firstString->GetHashcode();
200     auto rawHashCode =
201         String::ComputeHashcodeMutf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH);
202 
203     ASSERT_EQ(stringHashCode, rawHashCode);
204 }
TEST_F(StringTest,notCompressedHashCodeUtf8)205 TEST_F(StringTest, notCompressedHashCodeUtf8)
206 {
207     std::vector<uint8_t> data {0xc2, 0xa7};
208 
209     size_t size = 1;
210     for (size_t i = 0; i < 20U; i++) {
211         data.push_back(0x30 + i);
212         size += 1;
213     }
214     data.push_back(0);
215 
216     String *firstString = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(data.data()), size,
217                                                   GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
218     auto stringHashCode = firstString->GetHashcode();
219     auto rawHashCode = String::ComputeHashcodeMutf8(reinterpret_cast<const uint8_t *>(data.data()), size);
220 
221     ASSERT_EQ(stringHashCode, rawHashCode);
222 }
223 
TEST_F(StringTest,compressedHashCodeUtf16)224 TEST_F(StringTest, compressedHashCodeUtf16)
225 {
226     std::vector<uint16_t> data;
227 
228     size_t size = 30;
229     for (size_t i = 0; i < size; i++) {
230         data.push_back(i + 1);
231     }
232     data.push_back(0);
233 
234     auto *firstString =
235         String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
236     auto stringHashCode = firstString->GetHashcode();
237     auto rawHashCode = String::ComputeHashcodeUtf16(data.data(), data.size());
238     ASSERT_EQ(stringHashCode, rawHashCode);
239 }
240 
TEST_F(StringTest,notCompressedHashCodeUtf16)241 TEST_F(StringTest, notCompressedHashCodeUtf16)
242 {
243     std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
244     auto *firstString =
245         String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
246     auto stringHashCode = firstString->GetHashcode();
247     auto rawHashCode = String::ComputeHashcodeUtf16(data.data(), data.size());
248     ASSERT_EQ(stringHashCode, rawHashCode);
249 }
250 
TEST_F(StringTest,lengthUtf8)251 TEST_F(StringTest, lengthUtf8)
252 {
253     String *string =
254         String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
255                                 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
256     ASSERT_EQ(string->GetLength(), SIMPLE_UTF8_STRING_LENGTH);
257 }
258 
TEST_F(StringTest,lengthUtf16)259 TEST_F(StringTest, lengthUtf16)
260 {
261     std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
262     auto *string =
263         String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
264     ASSERT_EQ(string->GetLength(), data.size());
265 }
266 
TEST_F(StringTest,DifferentLengthStringCompareTest)267 TEST_F(StringTest, DifferentLengthStringCompareTest)
268 {
269     static constexpr uint32_t F_STRING_LENGTH = 8;
270     // NOLINTNEXTLINE(modernize-avoid-c-arrays)
271     static constexpr char F_STRING[F_STRING_LENGTH + 1] = "Hello, w";
272     String *firstString =
273         String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
274                                 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
275     ASSERT_EQ(firstString->GetLength(), SIMPLE_UTF8_STRING_LENGTH);
276     String *secondString = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(F_STRING), F_STRING_LENGTH,
277                                                    GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
278     ASSERT_EQ(secondString->GetLength(), F_STRING_LENGTH);
279     ASSERT_EQ(String::StringsAreEqual(firstString, secondString), false);
280 }
281 
TEST_F(StringTest,ForeignLengthAndCopyTest1b0)282 TEST_F(StringTest, ForeignLengthAndCopyTest1b0)
283 {
284     std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0xc0, 0x80, 0x00};
285     uint32_t utf16Length = data.size();
286     String *string = String::CreateFromMUtf8(data.data(), utf16Length - 2U, GetLanguageContext(),
287                                              Runtime::GetCurrent()->GetPandaVM());  // c080 is U+0000
288     ASSERT_EQ(string->GetMUtf8Length(), data.size());
289     ASSERT_EQ(string->GetUtf16Length(), data.size() - 2U);  // \0 doesn't counts for UTF16
290     std::vector<uint8_t> out8(data.size());
291     ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
292     ASSERT_EQ(out8, data);
293     std::vector<uint16_t> res16 {'a', 'b', 'c', 'd', 'z', 0x00};
294     std::vector<uint16_t> out16(res16.size());
295     ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
296     ASSERT_EQ(out16, res16);
297 }
298 
TEST_F(StringTest,ForeignLengthAndCopyTest1b)299 TEST_F(StringTest, ForeignLengthAndCopyTest1b)
300 {
301     std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0x7f, 0x00};
302     uint32_t utf16Length = data.size();
303     String *string = String::CreateFromMUtf8(data.data(), utf16Length - 1, GetLanguageContext(),
304                                              Runtime::GetCurrent()->GetPandaVM());
305     ASSERT_EQ(string->GetMUtf8Length(), data.size());
306     ASSERT_EQ(string->GetUtf16Length(), data.size() - 1);  // \0 doesn't counts for UTF16
307     std::vector<uint8_t> out8(data.size());
308     ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
309     ASSERT_EQ(out8, data);
310     std::vector<uint16_t> res16 {'a', 'b', 'c', 'd', 'z', 0x7f};
311     std::vector<uint16_t> out16(res16.size());
312     ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
313     ASSERT_EQ(out16, res16);
314 }
315 
TEST_F(StringTest,ForeignLengthAndCopyTest2b)316 TEST_F(StringTest, ForeignLengthAndCopyTest2b)
317 {
318     std::vector<uint8_t> data {0xc2, 0xa7, 0x33, 0x00};  // UTF-16 size is 2
319     String *string =
320         String::CreateFromMUtf8(data.data(), 2U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
321     ASSERT_EQ(string->GetMUtf8Length(), data.size());
322     ASSERT_EQ(string->GetUtf16Length(), 2U);  // \0 doesn't counts for UTF16
323     std::vector<uint8_t> out8(data.size());
324     ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
325     ASSERT_EQ(out8, data);
326     std::vector<uint16_t> res16 {0xa7, 0x33};
327     std::vector<uint16_t> out16(res16.size());
328     ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
329     ASSERT_EQ(out16, res16);
330 }
331 
TEST_F(StringTest,ForeignLengthAndCopyTest3b)332 TEST_F(StringTest, ForeignLengthAndCopyTest3b)
333 {
334     std::vector<uint8_t> data {0xef, 0xbf, 0x83, 0x33, 0x00};  // UTF-16 size is 2
335     String *string =
336         String::CreateFromMUtf8(data.data(), 2U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
337     ASSERT_EQ(string->GetMUtf8Length(), data.size());
338     ASSERT_EQ(string->GetUtf16Length(), 2U);  // \0 doesn't counts for UTF16
339     std::vector<uint8_t> out8(data.size());
340     ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
341     ASSERT_EQ(out8, data);
342     std::vector<uint16_t> res16 {0xffc3, 0x33};
343     std::vector<uint16_t> out16(res16.size());
344     ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
345     ASSERT_EQ(out16, res16);
346 }
347 
TEST_F(StringTest,ForeignLengthAndCopyTest6b)348 TEST_F(StringTest, ForeignLengthAndCopyTest6b)
349 {
350     std::vector<uint8_t> data {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7, 0x20, 0x00};  // UTF-16 size is 3
351     // We support 4-byte utf-8 sequences, so {0xd801, 0xdc37} is encoded to 4 bytes instead of 6
352     std::vector<uint8_t> utf8Data {0xf0, 0x90, 0x90, 0xb7, 0x20, 0x00};
353     String *string =
354         String::CreateFromMUtf8(data.data(), 3U, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
355     ASSERT_EQ(string->GetMUtf8Length(), utf8Data.size());
356     ASSERT_EQ(string->GetUtf16Length(), 3U);  // \0 doesn't counts for UTF16
357     std::vector<uint8_t> out8(utf8Data.size());
358     string->CopyDataMUtf8(out8.data(), out8.size(), true);
359     ASSERT_EQ(out8, utf8Data);
360     std::vector<uint16_t> res16 {0xd801, 0xdc37, 0x20};
361     std::vector<uint16_t> out16(res16.size());
362     ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
363     ASSERT_EQ(out16, res16);
364 }
365 
TEST_F(StringTest,RegionCopyTestMutf8)366 TEST_F(StringTest, RegionCopyTestMutf8)
367 {
368     std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0x00};
369     uint32_t utf16Length = data.size() - 1;
370     String *string =
371         String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
372     size_t start = 2;
373     size_t len = string->GetMUtf8Length();
374     std::vector<uint8_t> res = {'c', 'd', 0x00};
375     std::vector<uint8_t> out8(res.size());
376     ASSERT_EQ(string->CopyDataRegionMUtf8(out8.data(), start, len - start - 1 - 1, out8.size()), out8.size() - 1);
377     out8[out8.size() - 1] = '\0';
378     ASSERT_EQ(out8, res);
379     size_t len16 = string->GetUtf16Length();
380     std::vector<uint16_t> res16 = {'c', 'd'};
381     std::vector<uint16_t> out16(res16.size());
382     ASSERT_EQ(string->CopyDataRegionUtf16(out16.data(), start, len16 - start - 1, out16.size()), out16.size());
383     ASSERT_EQ(out16, res16);
384 }
385 
TEST_F(StringTest,RegionCopyTestUtf8)386 TEST_F(StringTest, RegionCopyTestUtf8)
387 {
388     std::vector<uint8_t> data {'a', 'b', 'h', 'e', 'l', 'l', 'o', 'c', 'd', 'z', 0};
389     std::vector<uint8_t> res {'h', 'e', 'l', 'l', 'o', 0};
390     std::vector<uint8_t> copiedDataUtf8(res.size());
391     size_t start = 2;
392     size_t len = 5;
393     String *str =
394         String::CreateFromUtf8(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
395 
396     ASSERT_EQ(str->CopyDataRegionUtf8(copiedDataUtf8.data(), start, len, copiedDataUtf8.size() - 1), res.size() - 1);
397     ASSERT_EQ(copiedDataUtf8, res);
398 
399     std::vector<uint16_t> res16 {'h', 'e', 'l', 'l', 'o'};
400     std::vector<uint16_t> copiedDataUtf16(res16.size());
401 
402     ASSERT_EQ(str->CopyDataRegionUtf16(copiedDataUtf16.data(), start, len, copiedDataUtf16.size()), res16.size());
403     ASSERT_EQ(copiedDataUtf16, res16);
404 }
405 
TEST_F(StringTest,RegionCopyTestUtf16)406 TEST_F(StringTest, RegionCopyTestUtf16)
407 {
408     std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0xc2, 0xa7, 0x00};
409     uint32_t utf16Length = data.size() - 1 - 1;
410     String *string =
411         String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
412     size_t start = 2;
413     std::vector<uint8_t> res = {'c', 'd', 'z', 0x00};
414     std::vector<uint8_t> out8(res.size());
415     ASSERT_EQ(string->CopyDataRegionMUtf8(out8.data(), start, 3U, out8.size()), out8.size() - 1);
416     out8[out8.size() - 1] = '\0';
417     ASSERT_EQ(out8, res);
418     size_t len16 = string->GetUtf16Length();
419     std::vector<uint16_t> out16(len16 - start - 1);
420     std::vector<uint16_t> res16 = {'c', 'd', 'z'};
421     ASSERT_EQ(string->CopyDataRegionUtf16(out16.data(), start, 3U, out16.size()), out16.size());
422     ASSERT_EQ(out16, res16);
423 }
424 
TEST_F(StringTest,GetUtf8Length)425 TEST_F(StringTest, GetUtf8Length)
426 {
427     std::vector<uint8_t> data = {'H', 'e', 'l', 'l', 'o', 'w', 'o', 'r', 'l', 'd', '!', 0};
428     String *str =
429         String::CreateFromUtf8(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
430     ASSERT_EQ(str->GetUtf8Length(), data.size());
431 }
432 
TEST_F(StringTest,GetDataUtf8)433 TEST_F(StringTest, GetDataUtf8)
434 {
435     std::vector<uint8_t> example = {'e', 'x', 'a', 'm', 'p', 'l', 'e'};
436     String *string1 = String::CreateFromUtf8(example.data(), example.size(), GetLanguageContext(),
437                                              Runtime::GetCurrent()->GetPandaVM());
438     ASSERT_FALSE(string1->IsUtf16());
439     std::vector<uint8_t> data2(string1->GetDataUtf8(), string1->GetDataUtf8() + example.size());  // NOLINT
440 
441     String *string2 =
442         String::CreateFromUtf8(data2.data(), data2.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
443     ASSERT_FALSE(string2->IsUtf16());
444     ASSERT_TRUE(String::StringsAreEqual(string1, string2));
445 }
446 
TEST_F(StringTest,SameLengthStringCompareTest)447 TEST_F(StringTest, SameLengthStringCompareTest)
448 {
449     static constexpr uint32_t STRING_LENGTH = 10;
450     char *fString = new char[STRING_LENGTH + 1];
451     char *sString = new char[STRING_LENGTH + 1];
452 
453     for (uint32_t i = 0; i < STRING_LENGTH; i++) {
454         // Hack for ConvertMUtf8ToUtf16 call.
455         // We should use char from 0x7f to 0x0 if we want to
456         // generate one utf16 (0x00xx) from this mutf8.
457         // NOLINTNEXTLINE(cert-msc50-cpp)
458         uint8_t val1 = rand();
459         val1 = val1 >> 1U;
460         if (val1 == 0) {
461             val1++;
462         }
463 
464         // NOLINTNEXTLINE(cert-msc50-cpp)
465         uint8_t val2 = rand();
466         val2 = val2 >> 1U;
467         if (val2 == 0) {
468             val2++;
469         }
470 
471         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
472         fString[i] = val1;
473         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
474         sString[i] = val2;
475     }
476     // Set the last elements in strings with size more than 0x8 to disable compressing.
477     // This will leads to count two MUtf-8 bytes as one UTF-16 so length = string_length - 1
478     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
479     fString[STRING_LENGTH - 2U] = uint8_t(0x80);
480     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
481     sString[STRING_LENGTH - 2U] = uint8_t(0x80);
482     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
483     fString[STRING_LENGTH - 1] = uint8_t(0x01);
484     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
485     sString[STRING_LENGTH - 1] = uint8_t(0x01);
486     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
487     fString[STRING_LENGTH] = '\0';
488     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
489     sString[STRING_LENGTH] = '\0';
490 
491     String *firstUtf16String = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), STRING_LENGTH - 1,
492                                                        GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
493     // Try to use function with automatic length detection
494     String *secondUtf16String = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString),
495                                                         GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
496     ASSERT_EQ(firstUtf16String->GetLength(), STRING_LENGTH - 1);
497     ASSERT_EQ(secondUtf16String->GetLength(), STRING_LENGTH - 1);
498 
499     // Dirty hack to not create utf16 for our purpose, just reuse old one
500     // Try to create compressed strings.
501     String *firstUtf8String = String::CreateFromUtf16(firstUtf16String->GetDataUtf16(), STRING_LENGTH - 1,
502                                                       GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
503     String *secondUtf8String = String::CreateFromUtf16(firstUtf16String->GetDataUtf16(), STRING_LENGTH - 1,
504                                                        GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
505     ASSERT_EQ(firstUtf8String->GetLength(), STRING_LENGTH - 1);
506     ASSERT_EQ(secondUtf8String->GetLength(), STRING_LENGTH - 1);
507 
508     ASSERT_EQ(String::StringsAreEqual(firstUtf16String, secondUtf16String), strcmp(fString, sString) == 0);
509     ASSERT_EQ(String::StringsAreEqual(firstUtf16String, secondUtf8String),
510               firstUtf16String->IsUtf16() == secondUtf8String->IsUtf16());
511     ASSERT_EQ(String::StringsAreEqual(firstUtf8String, secondUtf8String), true);
512     ASSERT_TRUE(firstUtf16String->IsUtf16());
513     ASSERT_TRUE(String::StringsAreEqualUtf16(firstUtf16String, firstUtf16String->GetDataUtf16(),
514                                              firstUtf16String->GetLength()));
515 
516     delete[] fString;
517     delete[] sString;
518 }
519 
TEST_F(StringTest,ObjectSize)520 TEST_F(StringTest, ObjectSize)
521 {
522     {
523         std::vector<uint8_t> data {'1', '2', '3', '4', '5', 0x00};
524         uint32_t utf16Length = data.size() - 1;
525         String *string = String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(),
526                                                  Runtime::GetCurrent()->GetPandaVM());
527         ASSERT_EQ(string->ObjectSize(), String::ComputeSizeMUtf8(utf16Length));
528     }
529 
530     {
531         std::vector<uint8_t> data {0x80, 0x01, 0x80, 0x02, 0x00};
532         uint32_t utf16Length = data.size() / 2U;
533         String *string = String::CreateFromMUtf8(data.data(), utf16Length, GetLanguageContext(),
534                                                  Runtime::GetCurrent()->GetPandaVM());
535         ASSERT_EQ(string->ObjectSize(), String::ComputeSizeUtf16(utf16Length));
536     }
537 }
538 
TEST_F(StringTest,AtTest)539 TEST_F(StringTest, AtTest)
540 {
541     // utf8
542     std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
543     String *string = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
544                                              Runtime::GetCurrent()->GetPandaVM());
545     ASSERT_EQ(false, string->IsUtf16());
546     for (uint32_t i = 0; i < data1.size() - 1; i++) {
547         ASSERT_EQ(data1[i], string->At(i));
548     }
549 
550     // utf16
551     std::vector<uint16_t> data2 {'a', 'b', 0xab, 0xdc, 'z', 0};
552     string = String::CreateFromUtf16(data2.data(), data2.size() - 1, GetLanguageContext(),
553                                      Runtime::GetCurrent()->GetPandaVM());
554     ASSERT_EQ(true, string->IsUtf16());
555     for (uint32_t i = 0; i < data2.size() - 1; i++) {
556         ASSERT_EQ(data2[i], string->At(i));
557     }
558 
559     // utf16 -> utf8
560     std::vector<uint16_t> data3 {'a', 'b', 121, 122, 'z', 0};
561     string = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
562                                      Runtime::GetCurrent()->GetPandaVM());
563     ASSERT_EQ(false, string->IsUtf16());
564     for (uint32_t i = 0; i < data3.size() - 1; i++) {
565         ASSERT_EQ(data3[i], string->At(i));
566     }
567 }
568 
TEST_F(StringTest,IndexOfTest)569 TEST_F(StringTest, IndexOfTest)
570 {
571     std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
572     std::vector<uint8_t> data2 {'b', 'c', 'd', 0};
573     std::vector<uint16_t> data3 {'a', 'b', 'c', 'd', 'z', 0};
574     std::vector<uint16_t> data4 {'b', 'c', 'd', 0};
575     String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
576                                               Runtime::GetCurrent()->GetPandaVM());
577     String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
578                                               Runtime::GetCurrent()->GetPandaVM());
579     String *string3 = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
580                                               Runtime::GetCurrent()->GetPandaVM());
581     String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
582                                               Runtime::GetCurrent()->GetPandaVM());
583 
584     auto index = string1->IndexOf(string2, 1);
585     auto index1 = string1->IndexOf(string4, 1);
586     auto index2 = string3->IndexOf(string2, 1);
587     auto index3 = string3->IndexOf(string4, 1);
588     std::cout << index << std::endl;
589     ASSERT_EQ(index, index2);
590     ASSERT_EQ(index1, index3);
591     index = string1->IndexOf(string2, 2_I);
592     index1 = string1->IndexOf(string4, 2_I);
593     index2 = string3->IndexOf(string2, 2_I);
594     index3 = string3->IndexOf(string4, 2_I);
595     std::cout << index << std::endl;
596     ASSERT_EQ(index, index2);
597     ASSERT_EQ(index1, index3);
598 }
599 
TEST_F(StringTest,IndexOfTest2)600 TEST_F(StringTest, IndexOfTest2)
601 {
602     {
603         std::vector<uint8_t> stringData {'a', 'b', 'a', 'c', 'a', 'b', 'a', 0};
604         std::vector<uint8_t> patternData {'a', 'b', 'a', 0};
605         String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
606                                                  Runtime::GetCurrent()->GetPandaVM());
607         String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
608                                                   Runtime::GetCurrent()->GetPandaVM());
609         ASSERT_EQ(0, string->IndexOf(pattern, -1));
610         ASSERT_EQ(0, string->IndexOf(pattern, 0));
611         ASSERT_EQ(4_I, string->IndexOf(pattern, 1));
612         ASSERT_EQ(4_I, string->IndexOf(pattern, 4_I));
613         ASSERT_EQ(-1, string->IndexOf(pattern, 5_I));
614         ASSERT_EQ(-1, string->IndexOf(pattern, 6_I));
615 
616         String *emptyString = String::CreateEmptyString(GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
617         ASSERT_EQ(-1, emptyString->IndexOf(string, 0));
618         ASSERT_EQ(0, string->IndexOf(emptyString, -3_I));
619         ASSERT_EQ(2_I, string->IndexOf(emptyString, 2_I));
620         ASSERT_EQ(7_I, string->IndexOf(emptyString, 10_I));
621     }
622     {
623         std::vector<uint8_t> stringData {'a', 'b', 'c', 'd', 'e', 'f', 'g', 0};
624         std::vector<uint8_t> patternData {'d', 'e', 'f', 0};
625         String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
626                                                  Runtime::GetCurrent()->GetPandaVM());
627         String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
628                                                   Runtime::GetCurrent()->GetPandaVM());
629         ASSERT_EQ(3_I, string->IndexOf(pattern, 0));
630     }
631     {
632         std::vector<uint8_t> stringData {'a', 'b', 'a', 'a', 'a', 'a', 'a', 0};
633         std::vector<uint8_t> patternData {'a', 'a', 'a', 0};
634         String *string = String::CreateFromMUtf8(stringData.data(), stringData.size() - 1, GetLanguageContext(),
635                                                  Runtime::GetCurrent()->GetPandaVM());
636         String *pattern = String::CreateFromMUtf8(patternData.data(), patternData.size() - 1, GetLanguageContext(),
637                                                   Runtime::GetCurrent()->GetPandaVM());
638         ASSERT_EQ(2_I, string->IndexOf(pattern, 0));
639         ASSERT_EQ(2_I, string->IndexOf(pattern, 2_I));
640         ASSERT_EQ(3_I, string->IndexOf(pattern, 3_I));
641         ASSERT_EQ(4_I, string->IndexOf(pattern, 4_I));
642         ASSERT_EQ(-1, string->IndexOf(pattern, 5_I));
643     }
644 }
645 
TEST_F(StringTest,CompareTestUtf8)646 TEST_F(StringTest, CompareTestUtf8)
647 {
648     // utf8
649     std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
650     std::vector<uint8_t> data2 {'a', 'b', 'c', 'd', 'z', 'x', 0};
651     std::vector<uint16_t> data3 {'a', 'b', 'c', 'd', 'z', 0};
652     std::vector<uint16_t> data4 {'a', 'b', 'd', 'c', 'z', 0};
653     String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
654                                               Runtime::GetCurrent()->GetPandaVM());
655     String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
656                                               Runtime::GetCurrent()->GetPandaVM());
657     String *string3 = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
658                                               Runtime::GetCurrent()->GetPandaVM());
659     String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
660                                               Runtime::GetCurrent()->GetPandaVM());
661     ASSERT_EQ(false, string1->IsUtf16());
662     ASSERT_EQ(false, string2->IsUtf16());
663     ASSERT_EQ(false, string3->IsUtf16());
664     ASSERT_EQ(false, string4->IsUtf16());
665     ASSERT_LT(string1->Compare(string2), 0);
666     ASSERT_GT(string2->Compare(string1), 0);
667     ASSERT_EQ(string1->Compare(string3), 0);
668     ASSERT_EQ(string3->Compare(string1), 0);
669     ASSERT_LT(string2->Compare(string4), 0);
670     ASSERT_GT(string4->Compare(string2), 0);
671 
672     // utf8 vs utf16
673     std::vector<uint16_t> data5 {'a', 'b', 0xab, 0xdc, 'z', 0};
674     String *string5 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
675                                               Runtime::GetCurrent()->GetPandaVM());
676     ASSERT_EQ(true, string5->IsUtf16());
677     ASSERT_LT(string2->Compare(string5), 0);
678     ASSERT_GT(string5->Compare(string2), 0);
679     ASSERT_LT(string4->Compare(string5), 0);
680     ASSERT_GT(string5->Compare(string4), 0);
681 
682     // compare with self
683     ASSERT_EQ(string1->Compare(string1), 0);
684     ASSERT_EQ(string2->Compare(string2), 0);
685     ASSERT_EQ(string3->Compare(string3), 0);
686     ASSERT_EQ(string4->Compare(string4), 0);
687     ASSERT_EQ(string5->Compare(string5), 0);
688 }
689 
TEST_F(StringTest,CompareTestUtf16)690 TEST_F(StringTest, CompareTestUtf16)
691 {
692     std::vector<uint16_t> data5 {'a', 'b', 0xab, 0xdc, 'z', 0};
693     String *string5 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
694                                               Runtime::GetCurrent()->GetPandaVM());
695     std::vector<uint16_t> data6 {'a', 0xab, 0xab, 0};
696     String *string6 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
697                                               Runtime::GetCurrent()->GetPandaVM());
698     String *string7 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
699                                               Runtime::GetCurrent()->GetPandaVM());
700     ASSERT_EQ(true, string5->IsUtf16());
701     ASSERT_EQ(true, string6->IsUtf16());
702     ASSERT_EQ(true, string7->IsUtf16());
703 
704     ASSERT_LT(string5->Compare(string6), 0);
705     ASSERT_GT(string6->Compare(string5), 0);
706     ASSERT_EQ(string6->Compare(string7), 0);
707     ASSERT_EQ(string7->Compare(string6), 0);
708 
709     // compare with self
710     ASSERT_EQ(string5->Compare(string5), 0);
711     ASSERT_EQ(string6->Compare(string6), 0);
712     ASSERT_EQ(string7->Compare(string7), 0);
713 }
714 
TEST_F(StringTest,CompareTestLongUtf8)715 TEST_F(StringTest, CompareTestLongUtf8)
716 {
717     // long utf8 string vs long utf8 string
718     // utf8
719     std::vector<uint8_t> data8(16U, 'a');
720     data8.push_back(0);
721 
722     std::vector<uint8_t> data9(16U, 'a');
723     std::vector<uint8_t> tmp1 {'x', 'z'};
724     data9.insert(data9.end(), tmp1.begin(), tmp1.end());
725     data9.push_back(0);
726 
727     std::vector<uint8_t> data10(16U, 'a');
728     std::vector<uint8_t> tmp2 {'x', 'x', 'x', 'y', 'y', 'a', 'a'};
729     data10.insert(data10.end(), tmp2.begin(), tmp2.end());
730     data10.insert(data10.end(), 16U, 'a');
731     data10.push_back(0);
732 
733     std::vector<uint8_t> data11(16U, 'a');
734     std::vector<uint8_t> tmp3 {'x', 'x', 'x', 'y', 'y', 'y', 'y'};
735     data11.insert(data11.end(), tmp3.begin(), tmp3.end());
736     data11.insert(data11.end(), 16U, 'a');
737     data11.push_back(0);
738 
739     String *string8 = String::CreateFromMUtf8(data8.data(), data8.size() - 1, GetLanguageContext(),
740                                               Runtime::GetCurrent()->GetPandaVM());
741     String *string9 = String::CreateFromMUtf8(data9.data(), data9.size() - 1, GetLanguageContext(),
742                                               Runtime::GetCurrent()->GetPandaVM());
743     String *string10 = String::CreateFromMUtf8(data10.data(), data10.size() - 1, GetLanguageContext(),
744                                                Runtime::GetCurrent()->GetPandaVM());
745     String *string11 = String::CreateFromMUtf8(data11.data(), data11.size() - 1, GetLanguageContext(),
746                                                Runtime::GetCurrent()->GetPandaVM());
747     String *string12 = String::CreateFromMUtf8(data8.data(), data8.size() - 1, GetLanguageContext(),
748                                                Runtime::GetCurrent()->GetPandaVM());
749     String *string13 = String::CreateFromMUtf8(data9.data(), data9.size() - 1, GetLanguageContext(),
750                                                Runtime::GetCurrent()->GetPandaVM());
751 
752     // utf8 vs utf8
753     ASSERT_EQ(string8->Compare(string12), 0);
754     ASSERT_EQ(string12->Compare(string8), 0);
755     ASSERT_EQ(string9->Compare(string13), 0);
756     ASSERT_EQ(string13->Compare(string9), 0);
757     ASSERT_LT(string10->Compare(string11), 0);
758     ASSERT_GT(string11->Compare(string10), 0);
759     ASSERT_LT(string10->Compare(string9), 0);
760     ASSERT_GT(string9->Compare(string10), 0);
761 }
762 
TEST_F(StringTest,CompareTestLongUtf16)763 TEST_F(StringTest, CompareTestLongUtf16)
764 {
765     // long utf16 string vs long utf16 string
766     // utf16
767     std::vector<uint16_t> data14(16U, 0xab);
768     data14.push_back(0);
769 
770     std::vector<uint16_t> data15(16U, 0xab);
771     std::vector<uint16_t> tmp4 {'a', 0xbb};
772     data15.insert(data15.end(), tmp4.begin(), tmp4.end());
773     data15.push_back(0);
774 
775     std::vector<uint16_t> data16(16U, 0xab);
776     std::vector<uint16_t> tmp5 {'a', 'a', 0xcc, 0xcc, 0xdd, 0xdd, 0xdd};
777     data16.insert(data16.end(), tmp5.begin(), tmp5.end());
778     data16.insert(data16.end(), 16U, 0xab);
779     data16.push_back(0);
780 
781     std::vector<uint16_t> data17(16U, 0xab);
782     std::vector<uint16_t> tmp6 {'a', 'a', 0xdd, 0xdd, 0xdd, 0xdd, 0xdd};
783     data17.insert(data17.end(), tmp6.begin(), tmp6.end());
784     data17.insert(data17.end(), 16U, 0xab);
785     data17.push_back(0);
786 
787     String *string14 = String::CreateFromUtf16(data14.data(), data14.size() - 1, GetLanguageContext(),
788                                                Runtime::GetCurrent()->GetPandaVM());
789     String *string15 = String::CreateFromUtf16(data15.data(), data15.size() - 1, GetLanguageContext(),
790                                                Runtime::GetCurrent()->GetPandaVM());
791     String *string16 = String::CreateFromUtf16(data16.data(), data16.size() - 1, GetLanguageContext(),
792                                                Runtime::GetCurrent()->GetPandaVM());
793     String *string17 = String::CreateFromUtf16(data17.data(), data17.size() - 1, GetLanguageContext(),
794                                                Runtime::GetCurrent()->GetPandaVM());
795     String *string18 = String::CreateFromUtf16(data14.data(), data14.size() - 1, GetLanguageContext(),
796                                                Runtime::GetCurrent()->GetPandaVM());
797     String *string19 = String::CreateFromUtf16(data15.data(), data15.size() - 1, GetLanguageContext(),
798                                                Runtime::GetCurrent()->GetPandaVM());
799 
800     // utf16 vs utf16
801     ASSERT_EQ(string14->Compare(string18), 0);
802     ASSERT_EQ(string18->Compare(string14), 0);
803     ASSERT_EQ(string15->Compare(string19), 0);
804     ASSERT_EQ(string19->Compare(string15), 0);
805     ASSERT_LT(string16->Compare(string17), 0);
806     ASSERT_GT(string17->Compare(string16), 0);
807     ASSERT_LT(string16->Compare(string15), 0);
808     ASSERT_GT(string15->Compare(string16), 0);
809 }
810 
TEST_F(StringTest,ConcatTest)811 TEST_F(StringTest, ConcatTest)
812 {
813     // utf8 + utf8
814     std::vector<uint8_t> data1 {'f', 'g', 'h', 0};
815     std::vector<uint8_t> data2 {'a', 'b', 'c', 'd', 'e', 0};
816     std::vector<uint8_t> data3;
817     data3.insert(data3.end(), data1.begin(), data1.end() - 1);
818     data3.insert(data3.end(), data2.begin(), data2.end());
819 
820     String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
821                                               Runtime::GetCurrent()->GetPandaVM());
822     String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
823                                               Runtime::GetCurrent()->GetPandaVM());
824     String *string30 = String::CreateFromMUtf8(data3.data(), data3.size() - 1, GetLanguageContext(),
825                                                Runtime::GetCurrent()->GetPandaVM());
826     ASSERT_EQ(false, string1->IsUtf16());
827     ASSERT_EQ(false, string2->IsUtf16());
828     String *string31 = String::Concat(string1, string2, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
829     ASSERT_EQ(string30->Compare(string31), 0);
830     ASSERT_EQ(string31->Compare(string30), 0);
831 
832     // utf8 + utf16
833     std::vector<uint16_t> data4 {'a', 'b', 0xab, 0xdc, 'z', 0};
834     std::vector<uint16_t> data5 {'f', 'g', 'h', 'a', 'b', 0xab, 0xdc, 'z', 0};  // data1 + data4
835     String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
836                                               Runtime::GetCurrent()->GetPandaVM());
837     String *string50 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
838                                                Runtime::GetCurrent()->GetPandaVM());
839     String *string51 = String::Concat(string1, string4, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
840     ASSERT_EQ(string50->GetLength(), string51->GetLength());
841     ASSERT_EQ(string50->Compare(string51), 0);
842     ASSERT_EQ(string51->Compare(string50), 0);
843 
844     // utf16 + utf16
845     std::vector<uint16_t> data6;
846     data6.insert(data6.end(), data4.begin(), data4.end() - 1);
847     data6.insert(data6.end(), data5.begin(), data5.end());
848     String *string60 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
849                                                Runtime::GetCurrent()->GetPandaVM());
850     String *string61 = String::Concat(string4, string50, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
851     ASSERT_EQ(string60->Compare(string61), 0);
852     ASSERT_EQ(string61->Compare(string60), 0);
853 }
854 
TEST_F(StringTest,DoReplaceTest0)855 TEST_F(StringTest, DoReplaceTest0)
856 {
857     static constexpr uint32_t STRING_LENGTH = 10;
858     char *fString = new char[STRING_LENGTH + 1];
859     char *sString = new char[STRING_LENGTH + 1];
860 
861     for (uint32_t i = 0; i < STRING_LENGTH; i++) {
862         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
863         fString[i] = 'A' + i;
864         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
865         sString[i] = 'A' + i;
866     }
867     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
868     fString[0] = 'Z';
869     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
870     fString[STRING_LENGTH] = '\0';
871     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
872     sString[STRING_LENGTH] = '\0';
873 
874     String *fStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), GetLanguageContext(),
875                                                Runtime::GetCurrent()->GetPandaVM());
876     String *sStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString), GetLanguageContext(),
877                                                Runtime::GetCurrent()->GetPandaVM());
878     String *tStringS = String::DoReplace(fStringS, 'Z', 'A', GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
879     ASSERT_EQ(String::StringsAreEqual(tStringS, sStringS), true);
880 
881     delete[] fString;
882     delete[] sString;
883 }
884 
TEST_F(StringTest,FastSubstringTest0)885 TEST_F(StringTest, FastSubstringTest0)
886 {
887     uint32_t stringLength = 10;
888     char *fString = new char[stringLength + 1];
889     for (uint32_t i = 0; i < stringLength; i++) {
890         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
891         fString[i] = 'A' + i;
892     }
893     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
894     fString[stringLength] = '\0';
895 
896     uint32_t subStringLength = 5;
897     uint32_t subStringStart = 1;
898     char *sString = new char[subStringLength + 1];
899     for (uint32_t j = 0; j < subStringLength; j++) {
900         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
901         sString[j] = fString[subStringStart + j];
902     }
903     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
904     sString[subStringLength] = '\0';
905 
906     String *fStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(fString), GetLanguageContext(),
907                                                Runtime::GetCurrent()->GetPandaVM());
908     String *sStringS = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(sString), GetLanguageContext(),
909                                                Runtime::GetCurrent()->GetPandaVM());
910     String *tStringS = String::FastSubString(fStringS, subStringStart, subStringLength, GetLanguageContext(),
911                                              Runtime::GetCurrent()->GetPandaVM());
912     ASSERT_EQ(String::StringsAreEqual(tStringS, sStringS), true);
913 
914     delete[] fString;
915     delete[] sString;
916 }
917 
TEST_F(StringTest,ToCharArray)918 TEST_F(StringTest, ToCharArray)
919 {
920     // utf8
921     std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'e', 0};
922     String *utf8String = String::CreateFromMUtf8(data.data(), data.size() - 1, GetLanguageContext(),
923                                                  Runtime::GetCurrent()->GetPandaVM());
924     Array *newArray = utf8String->ToCharArray(GetLanguageContext());
925     for (uint32_t i = 0; i < newArray->GetLength(); ++i) {
926         ASSERT_EQ(data[i], newArray->Get<uint16_t>(i));
927     }
928 
929     std::vector<uint16_t> data1 {'f', 'g', 'h', 'a', 'b', 0x8ab, 0xdc, 'z', 0};
930     String *utf16String = String::CreateFromUtf16(data1.data(), data1.size() - 1, GetLanguageContext(),
931                                                   Runtime::GetCurrent()->GetPandaVM());
932     Array *newArray1 = utf16String->ToCharArray(GetLanguageContext());
933     for (uint32_t i = 0; i < newArray1->GetLength(); ++i) {
934         ASSERT_EQ(data1[i], newArray1->Get<uint16_t>(i));
935     }
936 }
937 
TEST_F(StringTest,CreateNewStingFromCharArray)938 TEST_F(StringTest, CreateNewStingFromCharArray)
939 {
940     std::vector<uint16_t> data {'f', 'g', 'h', 'a', 'b', 0x8ab, 0xdc, 'z', 0};
941     String *utf16String = String::CreateFromUtf16(data.data(), data.size() - 1, GetLanguageContext(),
942                                                   Runtime::GetCurrent()->GetPandaVM());
943     Array *charArray = utf16String->ToCharArray(GetLanguageContext());
944 
945     uint32_t charArrayLength = 5;
946     uint32_t charArrayOffset = 1;
947     std::vector<uint16_t> data1(charArrayLength + 1);
948     for (uint32_t i = 0; i < charArrayLength; ++i) {
949         data1[i] = data[i + charArrayOffset];
950     }
951     data1[charArrayLength] = 0;
952     String *utf16String1 = String::CreateFromUtf16(data1.data(), data1.size() - 1, GetLanguageContext(),
953                                                    Runtime::GetCurrent()->GetPandaVM());
954 
955     String *result = String::CreateNewStringFromChars(charArrayOffset, charArrayLength, charArray, GetLanguageContext(),
956                                                       Runtime::GetCurrent()->GetPandaVM());
957 
958     ASSERT_EQ(String::StringsAreEqual(result, utf16String1), true);
959 }
960 
TEST_F(StringTest,CreateNewStingFromByteArray)961 TEST_F(StringTest, CreateNewStingFromByteArray)
962 {
963     std::vector<uint8_t> data {'f', 'g', 'h', 'a', 'b', 0xab, 0xdc, 'z', 0};
964     uint32_t byteArrayLength = 5;
965     uint32_t byteArrayOffset = 1;
966     uint32_t highByte = 0;
967 
968     std::vector<uint16_t> data1(byteArrayLength);
969     for (uint32_t i = 0; i < byteArrayLength; ++i) {
970         data1[i] = (highByte << 8U) + (data[i + byteArrayOffset] & 0xFFU);
971     }
972     // NB! data1[byte_array_length] = 0; NOT NEEDED
973     String *string1 = String::CreateFromUtf16(data1.data(), byteArrayLength, GetLanguageContext(),
974                                               Runtime::GetCurrent()->GetPandaVM());
975 
976     LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
977     Class *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ark::ClassRoot::ARRAY_I8);
978     Array *byteArray = Array::Create(klass, data.size() - 1);
979     Span<uint8_t> sp(data.data(), data.size() - 1);
980     for (uint32_t i = 0; i < data.size() - 1; i++) {
981         byteArray->Set<uint8_t>(i, sp[i]);
982     }
983 
984     String *result = String::CreateNewStringFromBytes(byteArrayOffset, byteArrayLength, highByte, byteArray,
985                                                       GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
986 
987     ASSERT_EQ(String::StringsAreEqual(result, string1), true);
988 }
989 
990 }  // namespace ark::coretypes::test
991 
992 // NOLINTEND(readability-magic-numbers)
993