1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <ctime>
17
18 #include "gtest/gtest.h"
19 #include "libpandabase/utils/span.h"
20 #include "libpandabase/utils/utf.h"
21 #include "runtime/include/class_linker_extension.h"
22 #include "runtime/include/coretypes/array-inl.h"
23 #include "runtime/include/coretypes/string-inl.h"
24 #include "runtime/include/runtime.h"
25 #include "runtime/include/thread.h"
26
27 namespace panda::coretypes::test {
28
29 class StringTest : public testing::Test {
30 public:
StringTest()31 StringTest()
32 {
33 // Logger::InitializeStdLogging(Logger::Level::DEBUG, Logger::Component::ALL);
34 #ifdef PANDA_NIGHTLY_TEST_ON
35 seed_ = std::time(NULL);
36 #else
37 seed_ = 0xDEADBEEF;
38 #endif
39 srand(seed_);
40 // We need to create a runtime instance to be able to create strings.
41 options_.SetShouldLoadBootPandaFiles(false);
42 options_.SetShouldInitializeIntrinsics(false);
43 Runtime::Create(options_);
44 }
45
~StringTest()46 ~StringTest()
47 {
48 Runtime::Destroy();
49 // Logger::Destroy();
50 }
51
GetLanguageContext()52 LanguageContext GetLanguageContext()
53 {
54 return Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
55 }
56
SetUp()57 void SetUp() override
58 {
59 thread_ = panda::MTManagedThread::GetCurrent();
60 thread_->ManagedCodeBegin();
61 }
62
TearDown()63 void TearDown() override
64 {
65 thread_->ManagedCodeEnd();
66 }
67
68 protected:
69 panda::MTManagedThread *thread_ {nullptr};
70 static constexpr uint32_t SIMPLE_UTF8_STRING_LENGTH = 13;
71 static constexpr char SIMPLE_UTF8_STRING[SIMPLE_UTF8_STRING_LENGTH + 1] = "Hello, world!";
72 unsigned seed_;
73 RuntimeOptions options_;
74 };
75
TEST_F(StringTest,EqualStringWithCompressedRawUtf8Data)76 TEST_F(StringTest, EqualStringWithCompressedRawUtf8Data)
77 {
78 std::vector<uint8_t> data {0x01, 0x05, 0x07, 0x00};
79 uint32_t utf16_length = data.size() - 1;
80 auto *first_string =
81 String::CreateFromMUtf8(data.data(), utf16_length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
82 ASSERT_TRUE(String::StringsAreEqualMUtf8(first_string, data.data(), utf16_length));
83 }
84
TEST_F(StringTest,EqualStringWithNotCompressedRawUtf8Data)85 TEST_F(StringTest, EqualStringWithNotCompressedRawUtf8Data)
86 {
87 std::vector<uint8_t> data {0xc2, 0xa7};
88
89 for (size_t i = 0; i < 20; i++) {
90 data.push_back(0x30 + i);
91 }
92 data.push_back(0);
93
94 uint32_t utf16_length = data.size() - 2;
95 auto *first_string =
96 String::CreateFromMUtf8(data.data(), utf16_length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
97 ASSERT_TRUE(String::StringsAreEqualMUtf8(first_string, data.data(), utf16_length));
98 }
99
TEST_F(StringTest,NotEqualStringWithNotCompressedRawUtf8Data)100 TEST_F(StringTest, NotEqualStringWithNotCompressedRawUtf8Data)
101 {
102 std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
103 std::vector<uint8_t> data2 {0xc2, 0xa7, 0x34, 0x00};
104 uint32_t utf16_length = 2;
105 auto *first_string =
106 String::CreateFromMUtf8(data1.data(), utf16_length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
107 ASSERT_FALSE(String::StringsAreEqualMUtf8(first_string, data2.data(), utf16_length));
108 }
109
TEST_F(StringTest,NotEqualStringNotCompressedStringWithCompressedRawData)110 TEST_F(StringTest, NotEqualStringNotCompressedStringWithCompressedRawData)
111 {
112 std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
113 std::vector<uint8_t> data2 {0x02, 0x07, 0x04, 0x00};
114 uint32_t utf16_length = 2;
115 auto *first_string =
116 String::CreateFromMUtf8(data1.data(), utf16_length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
117 ASSERT_FALSE(String::StringsAreEqualMUtf8(first_string, data2.data(), utf16_length));
118 }
119
TEST_F(StringTest,NotEqualCompressedStringWithUncompressedRawUtf8Data)120 TEST_F(StringTest, NotEqualCompressedStringWithUncompressedRawUtf8Data)
121 {
122 std::vector<uint8_t> data1 {0x02, 0x07, 0x04, 0x00};
123 std::vector<uint8_t> data2 {0xc2, 0xa7, 0x33, 0x00};
124 uint32_t utf16_length = 2;
125 auto *first_string =
126 String::CreateFromMUtf8(data1.data(), utf16_length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
127 ASSERT_FALSE(String::StringsAreEqualMUtf8(first_string, data2.data(), utf16_length));
128 }
129
TEST_F(StringTest,EqualStringWithMUtf8DifferentLength)130 TEST_F(StringTest, EqualStringWithMUtf8DifferentLength)
131 {
132 std::vector<uint8_t> data1 {0xc2, 0xa7, 0x33, 0x00};
133 std::vector<uint8_t> data2 {0xc2, 0xa7, 0x00};
134 uint32_t utf16_length = 2;
135 auto *first_string =
136 String::CreateFromMUtf8(data1.data(), utf16_length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
137 ASSERT_FALSE(String::StringsAreEqualMUtf8(first_string, data2.data(), utf16_length - 1));
138 }
139
TEST_F(StringTest,EqualStringWithRawUtf16Data)140 TEST_F(StringTest, EqualStringWithRawUtf16Data)
141 {
142 std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
143 auto *first_string =
144 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
145 auto second_string = reinterpret_cast<const uint16_t *>(data.data());
146 ASSERT_TRUE(String::StringsAreEqualUtf16(first_string, second_string, data.size()));
147 }
148
TEST_F(StringTest,CompareCompressedStringWithRawUtf16)149 TEST_F(StringTest, CompareCompressedStringWithRawUtf16)
150 {
151 std::vector<uint16_t> data;
152
153 for (size_t i = 0; i < 30; i++) {
154 data.push_back(i + 1);
155 }
156 data.push_back(0);
157
158 auto *first_string = String::CreateFromUtf16(data.data(), data.size() - 1, GetLanguageContext(),
159 Runtime::GetCurrent()->GetPandaVM());
160 auto second_string = reinterpret_cast<const uint16_t *>(data.data());
161 ASSERT_TRUE(String::StringsAreEqualUtf16(first_string, second_string, data.size() - 1));
162 }
163
TEST_F(StringTest,EqualStringWithRawUtf16DifferentLength)164 TEST_F(StringTest, EqualStringWithRawUtf16DifferentLength)
165 {
166 std::vector<uint16_t> data1 {0xffc3, 0x33, 0x00};
167 std::vector<uint16_t> data2 {0xffc3, 0x33, 0x55, 0x00};
168 auto *first_string =
169 String::CreateFromUtf16(data1.data(), data1.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
170 auto second_string = reinterpret_cast<const uint16_t *>(data2.data());
171 ASSERT_FALSE(String::StringsAreEqualUtf16(first_string, second_string, data2.size()));
172 }
173
TEST_F(StringTest,NotEqualStringWithRawUtf16Data)174 TEST_F(StringTest, NotEqualStringWithRawUtf16Data)
175 {
176 std::vector<uint16_t> data1 {0xffc3, 0x33, 0x00};
177 std::vector<uint16_t> data2 {0xffc3, 0x34, 0x00};
178 auto *first_string =
179 String::CreateFromUtf16(data1.data(), data1.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
180
181 auto second_string = reinterpret_cast<const uint16_t *>(data2.data());
182 ASSERT_FALSE(String::StringsAreEqualUtf16(first_string, second_string, data2.size()));
183 }
184
TEST_F(StringTest,compressedHashCodeUtf8)185 TEST_F(StringTest, compressedHashCodeUtf8)
186 {
187 String *first_string =
188 String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
189 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
190 auto string_hash_code = first_string->GetHashcode();
191 auto raw_hash_code =
192 String::ComputeHashcodeMutf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH);
193
194 ASSERT_EQ(string_hash_code, raw_hash_code);
195 }
TEST_F(StringTest,notCompressedHashCodeUtf8)196 TEST_F(StringTest, notCompressedHashCodeUtf8)
197 {
198 std::vector<uint8_t> data {0xc2, 0xa7};
199
200 size_t size = 1;
201 for (size_t i = 0; i < 20; i++) {
202 data.push_back(0x30 + i);
203 size += 1;
204 }
205 data.push_back(0);
206
207 String *first_string = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(data.data()), size,
208 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
209 auto string_hash_code = first_string->GetHashcode();
210 auto raw_hash_code = String::ComputeHashcodeMutf8(reinterpret_cast<const uint8_t *>(data.data()), size);
211
212 ASSERT_EQ(string_hash_code, raw_hash_code);
213 }
214
TEST_F(StringTest,compressedHashCodeUtf16)215 TEST_F(StringTest, compressedHashCodeUtf16)
216 {
217 std::vector<uint16_t> data;
218
219 size_t size = 30;
220 for (size_t i = 0; i < size; i++) {
221 data.push_back(i + 1);
222 }
223 data.push_back(0);
224
225 auto *first_string =
226 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
227 auto string_hash_code = first_string->GetHashcode();
228 auto raw_hash_code = String::ComputeHashcodeUtf16(data.data(), data.size());
229 ASSERT_EQ(string_hash_code, raw_hash_code);
230 }
231
TEST_F(StringTest,notCompressedHashCodeUtf16)232 TEST_F(StringTest, notCompressedHashCodeUtf16)
233 {
234 std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
235 auto *first_string =
236 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
237 auto string_hash_code = first_string->GetHashcode();
238 auto raw_hash_code = String::ComputeHashcodeUtf16(data.data(), data.size());
239 ASSERT_EQ(string_hash_code, raw_hash_code);
240 }
241
TEST_F(StringTest,lengthUtf8)242 TEST_F(StringTest, lengthUtf8)
243 {
244 String *string =
245 String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
246 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
247 ASSERT_EQ(string->GetLength(), SIMPLE_UTF8_STRING_LENGTH);
248 }
249
TEST_F(StringTest,lengthUtf16)250 TEST_F(StringTest, lengthUtf16)
251 {
252 std::vector<uint16_t> data {0xffc3, 0x33, 0x00};
253 auto *string =
254 String::CreateFromUtf16(data.data(), data.size(), GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
255 ASSERT_EQ(string->GetLength(), data.size());
256 }
257
TEST_F(StringTest,DifferentLengthStringCompareTest)258 TEST_F(StringTest, DifferentLengthStringCompareTest)
259 {
260 static constexpr uint32_t f_string_length = 8;
261 static constexpr char f_string[f_string_length + 1] = "Hello, w";
262 String *first_string =
263 String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(SIMPLE_UTF8_STRING), SIMPLE_UTF8_STRING_LENGTH,
264 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
265 ASSERT_EQ(first_string->GetLength(), SIMPLE_UTF8_STRING_LENGTH);
266 String *second_string = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(f_string), f_string_length,
267 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
268 ASSERT_EQ(second_string->GetLength(), f_string_length);
269 ASSERT_EQ(String::StringsAreEqual(first_string, second_string), false);
270 }
271
TEST_F(StringTest,ForeignLengthAndCopyTest1b0)272 TEST_F(StringTest, ForeignLengthAndCopyTest1b0)
273 {
274 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0xc0, 0x80, 0x00};
275 uint32_t utf16_length = data.size();
276 String *string = String::CreateFromMUtf8(data.data(), utf16_length - 2, GetLanguageContext(),
277 Runtime::GetCurrent()->GetPandaVM()); // c080 is U+0000
278 ASSERT_EQ(string->GetMUtf8Length(), data.size());
279 ASSERT_EQ(string->GetUtf16Length(), data.size() - 2); // \0 doesn't counts for UTF16
280 std::vector<uint8_t> out8(data.size());
281 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
282 ASSERT_EQ(out8, data);
283 std::vector<uint16_t> res16 {'a', 'b', 'c', 'd', 'z', 0x00};
284 std::vector<uint16_t> out16(res16.size());
285 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
286 ASSERT_EQ(out16, res16);
287 }
288
TEST_F(StringTest,ForeignLengthAndCopyTest1b)289 TEST_F(StringTest, ForeignLengthAndCopyTest1b)
290 {
291 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0x7f, 0x00};
292 uint32_t utf16_length = data.size();
293 String *string = String::CreateFromMUtf8(data.data(), utf16_length - 1, GetLanguageContext(),
294 Runtime::GetCurrent()->GetPandaVM());
295 ASSERT_EQ(string->GetMUtf8Length(), data.size());
296 ASSERT_EQ(string->GetUtf16Length(), data.size() - 1); // \0 doesn't counts for UTF16
297 std::vector<uint8_t> out8(data.size());
298 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
299 ASSERT_EQ(out8, data);
300 std::vector<uint16_t> res16 {'a', 'b', 'c', 'd', 'z', 0x7f};
301 std::vector<uint16_t> out16(res16.size());
302 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
303 ASSERT_EQ(out16, res16);
304 }
305
TEST_F(StringTest,ForeignLengthAndCopyTest2b)306 TEST_F(StringTest, ForeignLengthAndCopyTest2b)
307 {
308 std::vector<uint8_t> data {0xc2, 0xa7, 0x33, 0x00}; // UTF-16 size is 2
309 String *string = String::CreateFromMUtf8(data.data(), 2, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
310 ASSERT_EQ(string->GetMUtf8Length(), data.size());
311 ASSERT_EQ(string->GetUtf16Length(), 2); // \0 doesn't counts for UTF16
312 std::vector<uint8_t> out8(data.size());
313 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
314 ASSERT_EQ(out8, data);
315 std::vector<uint16_t> res16 {0xa7, 0x33};
316 std::vector<uint16_t> out16(res16.size());
317 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
318 ASSERT_EQ(out16, res16);
319 }
320
TEST_F(StringTest,ForeignLengthAndCopyTest3b)321 TEST_F(StringTest, ForeignLengthAndCopyTest3b)
322 {
323 std::vector<uint8_t> data {0xef, 0xbf, 0x83, 0x33, 0x00}; // UTF-16 size is 2
324 String *string = String::CreateFromMUtf8(data.data(), 2, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
325 ASSERT_EQ(string->GetMUtf8Length(), data.size());
326 ASSERT_EQ(string->GetUtf16Length(), 2); // \0 doesn't counts for UTF16
327 std::vector<uint8_t> out8(data.size());
328 ASSERT_EQ(string->CopyDataMUtf8(out8.data(), out8.size(), true), data.size());
329 ASSERT_EQ(out8, data);
330 std::vector<uint16_t> res16 {0xffc3, 0x33};
331 std::vector<uint16_t> out16(res16.size());
332 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
333 ASSERT_EQ(out16, res16);
334 }
335
TEST_F(StringTest,ForeignLengthAndCopyTest6b)336 TEST_F(StringTest, ForeignLengthAndCopyTest6b)
337 {
338 std::vector<uint8_t> data {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7, 0x20, 0x00}; // UTF-16 size is 3
339 // We support 4-byte utf-8 sequences, so {0xd801, 0xdc37} is encoded to 4 bytes instead of 6
340 std::vector<uint8_t> utf8_data {0xf0, 0x90, 0x90, 0xb7, 0x20, 0x00};
341 String *string = String::CreateFromMUtf8(data.data(), 3, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
342 ASSERT_EQ(string->GetMUtf8Length(), utf8_data.size());
343 ASSERT_EQ(string->GetUtf16Length(), 3); // \0 doesn't counts for UTF16
344 std::vector<uint8_t> out8(utf8_data.size());
345 string->CopyDataMUtf8(out8.data(), out8.size(), true);
346 ASSERT_EQ(out8, utf8_data);
347 std::vector<uint16_t> res16 {0xd801, 0xdc37, 0x20};
348 std::vector<uint16_t> out16(res16.size());
349 ASSERT_EQ(string->CopyDataUtf16(out16.data(), out16.size()), res16.size());
350 ASSERT_EQ(out16, res16);
351 }
352
TEST_F(StringTest,RegionCopyTestMutf8)353 TEST_F(StringTest, RegionCopyTestMutf8)
354 {
355 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0x00};
356 uint32_t utf16_length = data.size() - 1;
357 String *string =
358 String::CreateFromMUtf8(data.data(), utf16_length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
359 size_t start = 2;
360 size_t len = string->GetMUtf8Length();
361 std::vector<uint8_t> res = {'c', 'd', 0x00};
362 std::vector<uint8_t> out8(res.size());
363 ASSERT_EQ(string->CopyDataRegionMUtf8(out8.data(), start, len - start - 1 - 1, out8.size()), out8.size() - 1);
364 out8[out8.size() - 1] = '\0';
365 ASSERT_EQ(out8, res);
366 size_t len16 = string->GetUtf16Length();
367 std::vector<uint16_t> res16 = {'c', 'd'};
368 std::vector<uint16_t> out16(res16.size());
369 ASSERT_EQ(string->CopyDataRegionUtf16(out16.data(), start, len16 - start - 1, out16.size()), out16.size());
370 ASSERT_EQ(out16, res16);
371 }
372
TEST_F(StringTest,RegionCopyTestUtf16)373 TEST_F(StringTest, RegionCopyTestUtf16)
374 {
375 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'z', 0xc2, 0xa7, 0x00};
376 uint32_t utf16_length = data.size() - 1 - 1;
377 String *string =
378 String::CreateFromMUtf8(data.data(), utf16_length, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
379 size_t start = 2;
380 std::vector<uint8_t> res = {'c', 'd', 'z', 0x00};
381 std::vector<uint8_t> out8(res.size());
382 ASSERT_EQ(string->CopyDataRegionMUtf8(out8.data(), start, 3, out8.size()), out8.size() - 1);
383 out8[out8.size() - 1] = '\0';
384 ASSERT_EQ(out8, res);
385 size_t len16 = string->GetUtf16Length();
386 std::vector<uint16_t> out16(len16 - start - 1);
387 std::vector<uint16_t> res16 = {'c', 'd', 'z'};
388 ASSERT_EQ(string->CopyDataRegionUtf16(out16.data(), start, 3, out16.size()), out16.size());
389 ASSERT_EQ(out16, res16);
390 }
391
TEST_F(StringTest,SameLengthStringCompareTest)392 TEST_F(StringTest, SameLengthStringCompareTest)
393 {
394 static constexpr uint32_t string_length = 10;
395 char *f_string = new char[string_length + 1];
396 char *s_string = new char[string_length + 1];
397
398 for (uint32_t i = 0; i < string_length; i++) {
399 // Hack for ConvertMUtf8ToUtf16 call.
400 // We should use char from 0x7f to 0x0 if we want to
401 // generate one utf16 (0x00xx) from this mutf8.
402 uint8_t val1 = rand();
403 val1 = val1 >> 1;
404 if (val1 == 0) {
405 val1++;
406 }
407
408 uint8_t val2 = rand();
409 val2 = val2 >> 1;
410 if (val2 == 0) {
411 val2++;
412 }
413
414 f_string[i] = val1;
415 s_string[i] = val2;
416 }
417 // Set the last elements in strings with size more than 0x8 to disable compressing.
418 // This will leads to count two MUtf-8 bytes as one UTF-16 so length = string_length - 1
419 f_string[string_length - 2] = uint8_t(0x80);
420 s_string[string_length - 2] = uint8_t(0x80);
421 f_string[string_length - 1] = uint8_t(0x01);
422 s_string[string_length - 1] = uint8_t(0x01);
423 f_string[string_length] = '\0';
424 s_string[string_length] = '\0';
425
426 String *first_utf16_string = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(f_string), string_length - 1,
427 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
428 // Try to use function with automatic length detection
429 String *second_utf16_string = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(s_string),
430 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
431 ASSERT_EQ(first_utf16_string->GetLength(), string_length - 1);
432 ASSERT_EQ(second_utf16_string->GetLength(), string_length - 1);
433
434 // Dirty hack to not create utf16 for our purpose, just reuse old one
435 // Try to create compressed strings.
436 String *first_utf8_string = String::CreateFromUtf16(first_utf16_string->GetDataUtf16(), string_length - 1,
437 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
438 String *second_utf8_string = String::CreateFromUtf16(first_utf16_string->GetDataUtf16(), string_length - 1,
439 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
440 ASSERT_EQ(first_utf8_string->GetLength(), string_length - 1);
441 ASSERT_EQ(second_utf8_string->GetLength(), string_length - 1);
442
443 ASSERT_EQ(String::StringsAreEqual(first_utf16_string, second_utf16_string), strcmp(f_string, s_string) == 0);
444 ASSERT_EQ(String::StringsAreEqual(first_utf16_string, second_utf8_string),
445 first_utf16_string->IsUtf16() == second_utf8_string->IsUtf16());
446 ASSERT_EQ(String::StringsAreEqual(first_utf8_string, second_utf8_string), true);
447 ASSERT_TRUE(first_utf16_string->IsUtf16());
448 ASSERT_TRUE(String::StringsAreEqualUtf16(first_utf16_string, first_utf16_string->GetDataUtf16(),
449 first_utf16_string->GetLength()));
450
451 delete[] f_string;
452 delete[] s_string;
453 }
454
TEST_F(StringTest,ObjectSize)455 TEST_F(StringTest, ObjectSize)
456 {
457 {
458 std::vector<uint8_t> data {'1', '2', '3', '4', '5', 0x00};
459 uint32_t utf16_length = data.size();
460 String *string = String::CreateFromMUtf8(data.data(), utf16_length, GetLanguageContext(),
461 Runtime::GetCurrent()->GetPandaVM());
462 ASSERT_EQ(string->ObjectSize(), String::ComputeSizeMUtf8(utf16_length));
463 }
464
465 {
466 std::vector<uint8_t> data {0x80, 0x01, 0x80, 0x02, 0x00};
467 uint32_t utf16_length = data.size() / 2;
468 String *string = String::CreateFromMUtf8(data.data(), utf16_length, GetLanguageContext(),
469 Runtime::GetCurrent()->GetPandaVM());
470 ASSERT_EQ(string->ObjectSize(), String::ComputeSizeUtf16(utf16_length));
471 }
472 }
473
TEST_F(StringTest,AtTest)474 TEST_F(StringTest, AtTest)
475 {
476 // utf8
477 std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
478 String *string = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
479 Runtime::GetCurrent()->GetPandaVM());
480 ASSERT_EQ(false, string->IsUtf16());
481 for (uint32_t i = 0; i < data1.size() - 1; i++) {
482 ASSERT_EQ(data1[i], string->At(i));
483 }
484
485 // utf16
486 std::vector<uint16_t> data2 {'a', 'b', 0xab, 0xdc, 'z', 0};
487 string = String::CreateFromUtf16(data2.data(), data2.size() - 1, GetLanguageContext(),
488 Runtime::GetCurrent()->GetPandaVM());
489 ASSERT_EQ(true, string->IsUtf16());
490 for (uint32_t i = 0; i < data2.size() - 1; i++) {
491 ASSERT_EQ(data2[i], string->At(i));
492 }
493
494 // utf16 -> utf8
495 std::vector<uint16_t> data3 {'a', 'b', 121, 122, 'z', 0};
496 string = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
497 Runtime::GetCurrent()->GetPandaVM());
498 ASSERT_EQ(false, string->IsUtf16());
499 for (uint32_t i = 0; i < data3.size() - 1; i++) {
500 ASSERT_EQ(data3[i], string->At(i));
501 }
502 }
503
TEST_F(StringTest,IndexOfTest)504 TEST_F(StringTest, IndexOfTest)
505 {
506 std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
507 std::vector<uint8_t> data2 {'b', 'c', 'd', 0};
508 std::vector<uint16_t> data3 {'a', 'b', 'c', 'd', 'z', 0};
509 std::vector<uint16_t> data4 {'b', 'c', 'd', 0};
510 String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
511 Runtime::GetCurrent()->GetPandaVM());
512 String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
513 Runtime::GetCurrent()->GetPandaVM());
514 String *string3 = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
515 Runtime::GetCurrent()->GetPandaVM());
516 String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
517 Runtime::GetCurrent()->GetPandaVM());
518
519 auto index = string1->IndexOf(string2, 1);
520 auto index1 = string1->IndexOf(string4, 1);
521 auto index2 = string3->IndexOf(string2, 1);
522 auto index3 = string3->IndexOf(string4, 1);
523 std::cout << index << std::endl;
524 ASSERT_EQ(index, index2);
525 ASSERT_EQ(index1, index3);
526 index = string1->IndexOf(string2, 2);
527 index1 = string1->IndexOf(string4, 2);
528 index2 = string3->IndexOf(string2, 2);
529 index3 = string3->IndexOf(string4, 2);
530 std::cout << index << std::endl;
531 ASSERT_EQ(index, index2);
532 ASSERT_EQ(index1, index3);
533 }
534
TEST_F(StringTest,CompareTest)535 TEST_F(StringTest, CompareTest)
536 {
537 // utf8
538 std::vector<uint8_t> data1 {'a', 'b', 'c', 'd', 'z', 0};
539 std::vector<uint8_t> data2 {'a', 'b', 'c', 'd', 'z', 'x', 0};
540 std::vector<uint16_t> data3 {'a', 'b', 'c', 'd', 'z', 0};
541 std::vector<uint16_t> data4 {'a', 'b', 'd', 'c', 'z', 0};
542 String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
543 Runtime::GetCurrent()->GetPandaVM());
544 String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
545 Runtime::GetCurrent()->GetPandaVM());
546 String *string3 = String::CreateFromUtf16(data3.data(), data3.size() - 1, GetLanguageContext(),
547 Runtime::GetCurrent()->GetPandaVM());
548 String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
549 Runtime::GetCurrent()->GetPandaVM());
550 ASSERT_EQ(false, string1->IsUtf16());
551 ASSERT_EQ(false, string2->IsUtf16());
552 ASSERT_EQ(false, string3->IsUtf16());
553 ASSERT_EQ(false, string4->IsUtf16());
554 ASSERT_LT(string1->Compare(string2), 0);
555 ASSERT_GT(string2->Compare(string1), 0);
556 ASSERT_EQ(string1->Compare(string3), 0);
557 ASSERT_EQ(string3->Compare(string1), 0);
558 ASSERT_LT(string2->Compare(string4), 0);
559 ASSERT_GT(string4->Compare(string2), 0);
560
561 // utf8 vs utf16
562 std::vector<uint16_t> data5 {'a', 'b', 0xab, 0xdc, 'z', 0};
563 String *string5 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
564 Runtime::GetCurrent()->GetPandaVM());
565 ASSERT_EQ(true, string5->IsUtf16());
566 ASSERT_LT(string2->Compare(string5), 0);
567 ASSERT_GT(string5->Compare(string2), 0);
568 ASSERT_LT(string4->Compare(string5), 0);
569 ASSERT_GT(string5->Compare(string4), 0);
570
571 // utf16 vs utf16
572 std::vector<uint16_t> data6 {'a', 0xab, 0xab, 0};
573 String *string6 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
574 Runtime::GetCurrent()->GetPandaVM());
575 String *string7 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
576 Runtime::GetCurrent()->GetPandaVM());
577 ASSERT_EQ(true, string6->IsUtf16());
578 ASSERT_EQ(true, string7->IsUtf16());
579 ASSERT_LT(string5->Compare(string6), 0);
580 ASSERT_GT(string6->Compare(string5), 0);
581 ASSERT_EQ(string6->Compare(string7), 0);
582 ASSERT_EQ(string7->Compare(string6), 0);
583
584 // compare with self
585 ASSERT_EQ(string1->Compare(string1), 0);
586 ASSERT_EQ(string2->Compare(string2), 0);
587 ASSERT_EQ(string3->Compare(string3), 0);
588 ASSERT_EQ(string4->Compare(string4), 0);
589 ASSERT_EQ(string5->Compare(string5), 0);
590 ASSERT_EQ(string6->Compare(string6), 0);
591 ASSERT_EQ(string7->Compare(string7), 0);
592
593 // long utf8 string vs long utf8 string
594 // utf8
595 std::vector<uint8_t> data8(16, 'a');
596 data8.push_back(0);
597
598 std::vector<uint8_t> data9(16, 'a');
599 std::vector<uint8_t> tmp1 {'x', 'z'};
600 data9.insert(data9.end(), tmp1.begin(), tmp1.end());
601 data9.push_back(0);
602
603 std::vector<uint8_t> data10(16, 'a');
604 std::vector<uint8_t> tmp2 {'x', 'x', 'x', 'y', 'y', 'a', 'a'};
605 data10.insert(data10.end(), tmp2.begin(), tmp2.end());
606 data10.insert(data10.end(), 16, 'a');
607 data10.push_back(0);
608
609 std::vector<uint8_t> data11(16, 'a');
610 std::vector<uint8_t> tmp3 {'x', 'x', 'x', 'y', 'y', 'y', 'y'};
611 data11.insert(data11.end(), tmp3.begin(), tmp3.end());
612 data11.insert(data11.end(), 16, 'a');
613 data11.push_back(0);
614
615 String *string8 = String::CreateFromMUtf8(data8.data(), data8.size() - 1, GetLanguageContext(),
616 Runtime::GetCurrent()->GetPandaVM());
617 String *string9 = String::CreateFromMUtf8(data9.data(), data9.size() - 1, GetLanguageContext(),
618 Runtime::GetCurrent()->GetPandaVM());
619 String *string10 = String::CreateFromMUtf8(data10.data(), data10.size() - 1, GetLanguageContext(),
620 Runtime::GetCurrent()->GetPandaVM());
621 String *string11 = String::CreateFromMUtf8(data11.data(), data11.size() - 1, GetLanguageContext(),
622 Runtime::GetCurrent()->GetPandaVM());
623 String *string12 = String::CreateFromMUtf8(data8.data(), data8.size() - 1, GetLanguageContext(),
624 Runtime::GetCurrent()->GetPandaVM());
625 String *string13 = String::CreateFromMUtf8(data9.data(), data9.size() - 1, GetLanguageContext(),
626 Runtime::GetCurrent()->GetPandaVM());
627
628 // utf8 vs utf8
629 ASSERT_EQ(string8->Compare(string12), 0);
630 ASSERT_EQ(string12->Compare(string8), 0);
631 ASSERT_EQ(string9->Compare(string13), 0);
632 ASSERT_EQ(string13->Compare(string9), 0);
633 ASSERT_LT(string10->Compare(string11), 0);
634 ASSERT_GT(string11->Compare(string10), 0);
635 ASSERT_LT(string10->Compare(string9), 0);
636 ASSERT_GT(string9->Compare(string10), 0);
637
638 // long utf16 string vs long utf16 string
639 // utf16
640 std::vector<uint16_t> data14(16, 0xab);
641 data14.push_back(0);
642
643 std::vector<uint16_t> data15(16, 0xab);
644 std::vector<uint16_t> tmp4 {'a', 0xbb};
645 data15.insert(data15.end(), tmp4.begin(), tmp4.end());
646 data15.push_back(0);
647
648 std::vector<uint16_t> data16(16, 0xab);
649 std::vector<uint16_t> tmp5 {'a', 'a', 0xcc, 0xcc, 0xdd, 0xdd, 0xdd};
650 data16.insert(data16.end(), tmp5.begin(), tmp5.end());
651 data16.insert(data16.end(), 16, 0xab);
652 data16.push_back(0);
653
654 std::vector<uint16_t> data17(16, 0xab);
655 std::vector<uint16_t> tmp6 {'a', 'a', 0xdd, 0xdd, 0xdd, 0xdd, 0xdd};
656 data17.insert(data17.end(), tmp6.begin(), tmp6.end());
657 data17.insert(data17.end(), 16, 0xab);
658 data17.push_back(0);
659
660 String *string14 = String::CreateFromUtf16(data14.data(), data14.size() - 1, GetLanguageContext(),
661 Runtime::GetCurrent()->GetPandaVM());
662 String *string15 = String::CreateFromUtf16(data15.data(), data15.size() - 1, GetLanguageContext(),
663 Runtime::GetCurrent()->GetPandaVM());
664 String *string16 = String::CreateFromUtf16(data16.data(), data16.size() - 1, GetLanguageContext(),
665 Runtime::GetCurrent()->GetPandaVM());
666 String *string17 = String::CreateFromUtf16(data17.data(), data17.size() - 1, GetLanguageContext(),
667 Runtime::GetCurrent()->GetPandaVM());
668 String *string18 = String::CreateFromUtf16(data14.data(), data14.size() - 1, GetLanguageContext(),
669 Runtime::GetCurrent()->GetPandaVM());
670 String *string19 = String::CreateFromUtf16(data15.data(), data15.size() - 1, GetLanguageContext(),
671 Runtime::GetCurrent()->GetPandaVM());
672
673 // utf16 vs utf16
674 ASSERT_EQ(string14->Compare(string18), 0);
675 ASSERT_EQ(string18->Compare(string14), 0);
676 ASSERT_EQ(string15->Compare(string19), 0);
677 ASSERT_EQ(string19->Compare(string15), 0);
678 ASSERT_LT(string16->Compare(string17), 0);
679 ASSERT_GT(string17->Compare(string16), 0);
680 ASSERT_LT(string16->Compare(string15), 0);
681 ASSERT_GT(string15->Compare(string16), 0);
682 }
683
TEST_F(StringTest,ConcatTest)684 TEST_F(StringTest, ConcatTest)
685 {
686 // utf8 + utf8
687 std::vector<uint8_t> data1 {'f', 'g', 'h', 0};
688 std::vector<uint8_t> data2 {'a', 'b', 'c', 'd', 'e', 0};
689 std::vector<uint8_t> data3;
690 data3.insert(data3.end(), data1.begin(), data1.end() - 1);
691 data3.insert(data3.end(), data2.begin(), data2.end());
692
693 String *string1 = String::CreateFromMUtf8(data1.data(), data1.size() - 1, GetLanguageContext(),
694 Runtime::GetCurrent()->GetPandaVM());
695 String *string2 = String::CreateFromMUtf8(data2.data(), data2.size() - 1, GetLanguageContext(),
696 Runtime::GetCurrent()->GetPandaVM());
697 String *string30 = String::CreateFromMUtf8(data3.data(), data3.size() - 1, GetLanguageContext(),
698 Runtime::GetCurrent()->GetPandaVM());
699 ASSERT_EQ(false, string1->IsUtf16());
700 ASSERT_EQ(false, string2->IsUtf16());
701 String *string31 = String::Concat(string1, string2, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
702 ASSERT_EQ(string30->Compare(string31), 0);
703 ASSERT_EQ(string31->Compare(string30), 0);
704
705 // utf8 + utf16
706 std::vector<uint16_t> data4 {'a', 'b', 0xab, 0xdc, 'z', 0};
707 std::vector<uint16_t> data5 {'f', 'g', 'h', 'a', 'b', 0xab, 0xdc, 'z', 0}; // data1 + data4
708 String *string4 = String::CreateFromUtf16(data4.data(), data4.size() - 1, GetLanguageContext(),
709 Runtime::GetCurrent()->GetPandaVM());
710 String *string50 = String::CreateFromUtf16(data5.data(), data5.size() - 1, GetLanguageContext(),
711 Runtime::GetCurrent()->GetPandaVM());
712 String *string51 = String::Concat(string1, string4, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
713 ASSERT_EQ(string50->GetLength(), string51->GetLength());
714 ASSERT_EQ(string50->Compare(string51), 0);
715 ASSERT_EQ(string51->Compare(string50), 0);
716
717 // utf16 + utf16
718 std::vector<uint16_t> data6;
719 data6.insert(data6.end(), data4.begin(), data4.end() - 1);
720 data6.insert(data6.end(), data5.begin(), data5.end());
721 String *string60 = String::CreateFromUtf16(data6.data(), data6.size() - 1, GetLanguageContext(),
722 Runtime::GetCurrent()->GetPandaVM());
723 String *string61 = String::Concat(string4, string50, GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
724 ASSERT_EQ(string60->Compare(string61), 0);
725 ASSERT_EQ(string61->Compare(string60), 0);
726 }
727
TEST_F(StringTest,DoReplaceTest0)728 TEST_F(StringTest, DoReplaceTest0)
729 {
730 static constexpr uint32_t string_length = 10;
731 char *f_string = new char[string_length + 1];
732 char *s_string = new char[string_length + 1];
733
734 for (uint32_t i = 0; i < string_length; i++) {
735 f_string[i] = 'A' + i;
736 s_string[i] = 'A' + i;
737 }
738 f_string[0] = 'Z';
739 f_string[string_length] = '\0';
740 s_string[string_length] = '\0';
741
742 String *f_string_s = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(f_string), GetLanguageContext(),
743 Runtime::GetCurrent()->GetPandaVM());
744 String *s_string_s = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(s_string), GetLanguageContext(),
745 Runtime::GetCurrent()->GetPandaVM());
746 String *t_string_s =
747 String::DoReplace(f_string_s, 'Z', 'A', GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
748 ASSERT_EQ(String::StringsAreEqual(t_string_s, s_string_s), true);
749
750 delete[] f_string;
751 delete[] s_string;
752 }
753
TEST_F(StringTest,FastSubstringTest0)754 TEST_F(StringTest, FastSubstringTest0)
755 {
756 uint32_t string_length = 10;
757 char *f_string = new char[string_length + 1];
758 for (uint32_t i = 0; i < string_length; i++) {
759 f_string[i] = 'A' + i;
760 }
761 f_string[string_length] = '\0';
762
763 uint32_t sub_string_length = 5;
764 uint32_t sub_string_start = 1;
765 char *s_string = new char[sub_string_length + 1];
766 for (uint32_t j = 0; j < sub_string_length; j++) {
767 s_string[j] = f_string[sub_string_start + j];
768 }
769 s_string[sub_string_length] = '\0';
770
771 String *f_string_s = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(f_string), GetLanguageContext(),
772 Runtime::GetCurrent()->GetPandaVM());
773 String *s_string_s = String::CreateFromMUtf8(reinterpret_cast<const uint8_t *>(s_string), GetLanguageContext(),
774 Runtime::GetCurrent()->GetPandaVM());
775 String *t_string_s = String::FastSubString(f_string_s, sub_string_start, sub_string_length, GetLanguageContext(),
776 Runtime::GetCurrent()->GetPandaVM());
777 ASSERT_EQ(String::StringsAreEqual(t_string_s, s_string_s), true);
778
779 delete[] f_string;
780 delete[] s_string;
781 }
782
TEST_F(StringTest,ToCharArray)783 TEST_F(StringTest, ToCharArray)
784 {
785 // utf8
786 std::vector<uint8_t> data {'a', 'b', 'c', 'd', 'e', 0};
787 String *utf8_string = String::CreateFromMUtf8(data.data(), data.size() - 1, GetLanguageContext(),
788 Runtime::GetCurrent()->GetPandaVM());
789 Array *new_array = utf8_string->ToCharArray(GetLanguageContext());
790 for (uint32_t i = 0; i < new_array->GetLength(); ++i) {
791 ASSERT_EQ(data[i], new_array->Get<uint16_t>(i));
792 }
793
794 std::vector<uint16_t> data1 {'f', 'g', 'h', 'a', 'b', 0x8ab, 0xdc, 'z', 0};
795 String *utf16_string = String::CreateFromUtf16(data1.data(), data1.size() - 1, GetLanguageContext(),
796 Runtime::GetCurrent()->GetPandaVM());
797 Array *new_array1 = utf16_string->ToCharArray(GetLanguageContext());
798 for (uint32_t i = 0; i < new_array1->GetLength(); ++i) {
799 ASSERT_EQ(data1[i], new_array1->Get<uint16_t>(i));
800 }
801 }
802
TEST_F(StringTest,CreateNewStingFromCharArray)803 TEST_F(StringTest, CreateNewStingFromCharArray)
804 {
805 std::vector<uint16_t> data {'f', 'g', 'h', 'a', 'b', 0x8ab, 0xdc, 'z', 0};
806 String *utf16_string = String::CreateFromUtf16(data.data(), data.size() - 1, GetLanguageContext(),
807 Runtime::GetCurrent()->GetPandaVM());
808 Array *char_array = utf16_string->ToCharArray(GetLanguageContext());
809
810 uint32_t char_array_length = 5;
811 uint32_t char_array_offset = 1;
812 std::vector<uint16_t> data1(char_array_length + 1);
813 for (uint32_t i = 0; i < char_array_length; ++i) {
814 data1[i] = data[i + char_array_offset];
815 }
816 data1[char_array_length] = 0;
817 String *utf16_string1 = String::CreateFromUtf16(data1.data(), data1.size() - 1, GetLanguageContext(),
818 Runtime::GetCurrent()->GetPandaVM());
819
820 String *result = String::CreateNewStringFromChars(char_array_offset, char_array_length, char_array,
821 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
822
823 ASSERT_EQ(String::StringsAreEqual(result, utf16_string1), true);
824 }
825
TEST_F(StringTest,CreateNewStingFromByteArray)826 TEST_F(StringTest, CreateNewStingFromByteArray)
827 {
828 std::vector<uint8_t> data {'f', 'g', 'h', 'a', 'b', 0xab, 0xdc, 'z', 0};
829 uint32_t byte_array_length = 5;
830 uint32_t byte_array_offset = 1;
831 uint32_t high_byte = 0;
832
833 std::vector<uint16_t> data1(byte_array_length);
834 for (uint32_t i = 0; i < byte_array_length; ++i) {
835 data1[i] = (high_byte << 8) + (data[i + byte_array_offset] & 0xFF);
836 }
837 // NB! data1[byte_array_length] = 0; NOT NEEDED
838 String *string1 = String::CreateFromUtf16(data1.data(), byte_array_length, GetLanguageContext(),
839 Runtime::GetCurrent()->GetPandaVM());
840
841 LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
842 Class *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(panda::ClassRoot::ARRAY_I8);
843 Array *byte_array = Array::Create(klass, data.size() - 1);
844 Span<uint8_t> sp(data.data(), data.size() - 1);
845 for (uint32_t i = 0; i < data.size() - 1; i++) {
846 byte_array->Set<uint8_t>(i, sp[i]);
847 }
848
849 String *result = String::CreateNewStringFromBytes(byte_array_offset, byte_array_length, high_byte, byte_array,
850 GetLanguageContext(), Runtime::GetCurrent()->GetPandaVM());
851
852 ASSERT_EQ(String::StringsAreEqual(result, string1), true);
853 }
854
855 } // namespace panda::coretypes::test
856