1 /**
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef STRING_TABLE_BASE_TEST_H
17 #define STRING_TABLE_BASE_TEST_H
18
19 #include "gtest/gtest.h"
20 #include "runtime/include/coretypes/string.h"
21 #include "runtime/include/runtime.h"
22 #include "runtime/include/thread.h"
23 #include "runtime/include/gc_task.h"
24 #include "runtime/include/panda_vm.h"
25 #include "runtime/handle_base-inl.h"
26 #include "runtime/mem/refstorage/global_object_storage.h"
27 #include "runtime/include/thread_scopes.h"
28 #include "test_utils.h"
29
30 #include "libpandafile/file.h"
31 #include "libpandafile/file_item_container.h"
32 #include "libpandafile/file_writer.h"
33
34 #include <limits>
35
36 namespace ark::mem::test {
37 class StringTableTest : public testing::TestWithParam<const char *> {
38 public:
39 static constexpr size_t G1_YOUNG_TEST_SIZE = 1_MB;
40 static constexpr size_t NON_G1_YOUNG_TEST_SIZE = 18_MB;
41 static constexpr size_t TEST_HEAP_SIZE = 36_MB;
StringTableTest()42 StringTableTest()
43 {
44 const std::string gcType = GetParam();
45 RuntimeOptions options;
46 options.SetShouldLoadBootPandaFiles(false);
47 options.SetShouldInitializeIntrinsics(false);
48 options.SetExplicitConcurrentGcEnabled(false);
49 if (gcType == "g1-gc") {
50 options.SetYoungSpaceSize(G1_YOUNG_TEST_SIZE);
51 } else {
52 options.SetYoungSpaceSize(NON_G1_YOUNG_TEST_SIZE);
53 }
54
55 options.SetHeapSizeLimit(TEST_HEAP_SIZE);
56 options.SetGcType(gcType);
57 options.SetCompilerEnableJit(false);
58 Runtime::Create(options);
59
60 thread_ = ark::MTManagedThread::GetCurrent();
61 }
62
63 NO_COPY_SEMANTIC(StringTableTest);
64 NO_MOVE_SEMANTIC(StringTableTest);
65
~StringTableTest()66 ~StringTableTest() override
67 {
68 Runtime::Destroy();
69 }
70
AllocUtf8String(std::vector<uint8_t> data,bool isMovable=true)71 static coretypes::String *AllocUtf8String(std::vector<uint8_t> data, bool isMovable = true)
72 {
73 LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
74 return coretypes::String::CreateFromMUtf8(data.data(), utf::MUtf8ToUtf16Size(data.data()), ctx,
75 Runtime::GetCurrent()->GetPandaVM(), isMovable);
76 }
77
RunStringTableTests()78 void RunStringTableTests()
79 {
80 EmptyTable();
81 InternCompressedUtf8AndString();
82 InternUncompressedUtf8AndString();
83 InternTheSameUtf16String();
84 InternManyStrings();
85 SweepObjectInTable();
86 SweepNonMovableObjectInTable();
87 SweepHumongousObjectInTable();
88 InternTooLongString();
89 }
90
EmptyTable()91 void EmptyTable()
92 {
93 ScopedManagedCodeThread s(thread_);
94 auto table = StringTable();
95 ASSERT_EQ(table.Size(), 0);
96 }
97
InternCompressedUtf8AndString()98 void InternCompressedUtf8AndString()
99 {
100 ScopedManagedCodeThread s(thread_);
101 auto table = StringTable();
102 std::vector<uint8_t> data {0x01, 0x02, 0x03, 0x00}; // NOLINT(readability-magic-numbers)
103 auto *string = AllocUtf8String(data);
104 auto *internedStr1 =
105 table.GetOrInternString(data.data(), data.size() - 1,
106 Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY));
107 auto *internedStr2 = table.GetOrInternString(
108 string, Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY));
109 ASSERT_EQ(internedStr1, internedStr2);
110 ASSERT_EQ(table.Size(), 1);
111 }
112
InternUncompressedUtf8AndString()113 void InternUncompressedUtf8AndString()
114 {
115 ScopedManagedCodeThread s(thread_);
116 auto table = StringTable();
117 std::vector<uint8_t> data {0xc2, 0xa7, 0x34, 0x00}; // NOLINT(readability-magic-numbers)
118 auto *string = AllocUtf8String(data);
119 auto *internedStr1 = table.GetOrInternString(
120 data.data(), 2, Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY));
121 auto *internedStr2 = table.GetOrInternString(
122 string, Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY));
123 ASSERT_EQ(internedStr1, internedStr2);
124 ASSERT_EQ(table.Size(), 1);
125 }
126
InternTheSameUtf16String()127 void InternTheSameUtf16String()
128 {
129 ScopedManagedCodeThread s(thread_);
130 auto table = StringTable();
131 std::vector<uint16_t> data {0xffc3, 0x33, 0x00}; // NOLINT(readability-magic-numbers)
132
133 LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
134 auto *firstString =
135 coretypes::String::CreateFromUtf16(data.data(), data.size(), ctx, Runtime::GetCurrent()->GetPandaVM());
136 auto *secondString =
137 coretypes::String::CreateFromUtf16(data.data(), data.size(), ctx, Runtime::GetCurrent()->GetPandaVM());
138
139 auto *internedStr1 = table.GetOrInternString(firstString, ctx);
140 auto *internedStr2 = table.GetOrInternString(secondString, ctx);
141 ASSERT_EQ(internedStr1, internedStr2);
142 ASSERT_EQ(table.Size(), 1);
143 }
144
InternManyStrings()145 void InternManyStrings()
146 {
147 ScopedManagedCodeThread s(thread_);
148 static constexpr size_t ITERATIONS = 50;
149 auto table = StringTable();
150 std::vector<uint8_t> data {0x00};
151 const unsigned numberOfLetters = 25;
152
153 LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
154 for (size_t i = 0; i < ITERATIONS; i++) {
155 data.insert(data.begin(), (('a' + i) % numberOfLetters) + 1);
156 [[maybe_unused]] auto *firstPointer = table.GetOrInternString(AllocUtf8String(data), ctx);
157 [[maybe_unused]] auto *secondPointer =
158 table.GetOrInternString(data.data(), utf::MUtf8ToUtf16Size(data.data()), ctx);
159 auto *thirdPointer = table.GetOrInternString(AllocUtf8String(data), ctx);
160 ASSERT_EQ(firstPointer, secondPointer);
161 ASSERT_EQ(secondPointer, thirdPointer);
162 }
163 ASSERT_EQ(table.Size(), ITERATIONS);
164 }
165
SweepObjectInTable()166 void SweepObjectInTable()
167 {
168 ScopedManagedCodeThread s(thread_);
169 auto table = thread_->GetVM()->GetStringTable();
170 auto tableInitSize = table->Size();
171 std::vector<uint8_t> data1 {0x01, 0x00};
172 std::vector<uint8_t> data2 {0x02, 0x00};
173 std::vector<uint8_t> data3 {0x03, 0x00};
174 const unsigned expectedTableSize = 2;
175
176 auto storage = thread_->GetVM()->GetGlobalObjectStorage();
177
178 auto *s1 = AllocUtf8String(data1);
179 auto ref1 = storage->Add(s1, Reference::ObjectType::GLOBAL);
180 auto *s2 = AllocUtf8String(data2);
181 auto ref2 = storage->Add(s2, Reference::ObjectType::GLOBAL);
182 auto *s3 = AllocUtf8String(data3);
183 auto ref3 = storage->Add(s3, Reference::ObjectType::GLOBAL);
184
185 auto pandaClassContext = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
186 table->GetOrInternString(reinterpret_cast<coretypes::String *>(storage->Get(ref1)), pandaClassContext);
187 table->GetOrInternString(reinterpret_cast<coretypes::String *>(storage->Get(ref2)), pandaClassContext);
188 table->GetOrInternString(reinterpret_cast<coretypes::String *>(storage->Get(ref3)), pandaClassContext);
189
190 storage->Remove(ref2);
191
192 thread_->GetVM()->GetGC()->WaitForGCInManaged(ark::GCTask(ark::GCTaskCause::EXPLICIT_CAUSE));
193 // Collect all heap for EXPLICIT_CAUSE
194 ASSERT_EQ(table->Size(), tableInitSize + expectedTableSize);
195
196 storage->Remove(ref1);
197 storage->Remove(ref3);
198 thread_->GetVM()->GetGC()->WaitForGCInManaged(ark::GCTask(ark::GCTaskCause::EXPLICIT_CAUSE));
199 // Collect all heap for EXPLICIT_CAUSE
200 ASSERT_EQ(table->Size(), tableInitSize);
201 }
202
SweepNonMovableObjectInTable()203 void SweepNonMovableObjectInTable()
204 {
205 ScopedManagedCodeThread s(thread_);
206 auto table = thread_->GetVM()->GetStringTable();
207 auto tableInitSize = table->Size();
208 std::vector<uint8_t> data1 {0x01, 0x00};
209 std::vector<uint8_t> data2 {0x02, 0x00};
210 std::vector<uint8_t> data3 {0x03, 0x00};
211 const unsigned expectedTableSize = 2;
212
213 auto storage = thread_->GetVM()->GetGlobalObjectStorage();
214
215 auto *s1 = AllocUtf8String(data1, false);
216 ASSERT_NE(s1, nullptr);
217 auto ref1 = storage->Add(s1, Reference::ObjectType::GLOBAL);
218 auto *s2 = AllocUtf8String(data2, false);
219 ASSERT_NE(s2, nullptr);
220 auto ref2 = storage->Add(s2, Reference::ObjectType::GLOBAL);
221 auto *s3 = AllocUtf8String(data3, false);
222 ASSERT_NE(s3, nullptr);
223 auto ref3 = storage->Add(s3, Reference::ObjectType::GLOBAL);
224
225 auto pandaClassContext = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
226 table->GetOrInternString(reinterpret_cast<coretypes::String *>(storage->Get(ref1)), pandaClassContext);
227 table->GetOrInternString(reinterpret_cast<coretypes::String *>(storage->Get(ref2)), pandaClassContext);
228 table->GetOrInternString(reinterpret_cast<coretypes::String *>(storage->Get(ref3)), pandaClassContext);
229
230 storage->Remove(ref2);
231
232 thread_->GetVM()->GetGC()->WaitForGCInManaged(ark::GCTask(ark::GCTaskCause::EXPLICIT_CAUSE));
233 // Collect all heap for EXPLICIT_CAUSE
234 ASSERT_EQ(table->Size(), tableInitSize + expectedTableSize);
235
236 storage->Remove(ref1);
237 storage->Remove(ref3);
238 thread_->GetVM()->GetGC()->WaitForGCInManaged(ark::GCTask(ark::GCTaskCause::EXPLICIT_CAUSE));
239 // Collect all heap for EXPLICIT_CAUSE
240 ASSERT_EQ(table->Size(), tableInitSize);
241 }
242
SweepHumongousObjectInTable()243 void SweepHumongousObjectInTable()
244 {
245 ScopedManagedCodeThread s(thread_);
246 auto table = thread_->GetVM()->GetStringTable();
247 static constexpr size_t HUMONGOUS_STRING_SIZE = 1_MB;
248 std::vector<uint8_t> data1 {0x01};
249 std::vector<uint8_t> data2 {0x02};
250 std::vector<uint8_t> data3 {0x03};
251 for (size_t i = 0; i < HUMONGOUS_STRING_SIZE; i++) {
252 data1.push_back(0x05);
253 data2.push_back(0x05);
254 data3.push_back(0x05);
255 }
256 data1.push_back(0x00);
257 data2.push_back(0x00);
258 data3.push_back(0x00);
259 auto tableInitSize = table->Size();
260 const unsigned expectedTableSize = 2;
261
262 auto storage = thread_->GetVM()->GetGlobalObjectStorage();
263
264 auto *s1 = AllocUtf8String(data1);
265 ASSERT_NE(s1, nullptr);
266 auto ref1 = storage->Add(s1, Reference::ObjectType::GLOBAL);
267 auto *s2 = AllocUtf8String(data2);
268 ASSERT_NE(s2, nullptr);
269 auto ref2 = storage->Add(s2, Reference::ObjectType::GLOBAL);
270 auto *s3 = AllocUtf8String(data3);
271 ASSERT_NE(s3, nullptr);
272 auto ref3 = storage->Add(s3, Reference::ObjectType::GLOBAL);
273
274 auto pandaClassContext = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
275 table->GetOrInternString(reinterpret_cast<coretypes::String *>(storage->Get(ref1)), pandaClassContext);
276 table->GetOrInternString(reinterpret_cast<coretypes::String *>(storage->Get(ref2)), pandaClassContext);
277 table->GetOrInternString(reinterpret_cast<coretypes::String *>(storage->Get(ref3)), pandaClassContext);
278
279 storage->Remove(ref2);
280
281 thread_->GetVM()->GetGC()->WaitForGCInManaged(ark::GCTask(ark::GCTaskCause::EXPLICIT_CAUSE));
282 // Collect all heap for EXPLICIT_CAUSE
283 ASSERT_EQ(table->Size(), tableInitSize + expectedTableSize);
284
285 storage->Remove(ref1);
286 storage->Remove(ref3);
287 thread_->GetVM()->GetGC()->WaitForGCInManaged(ark::GCTask(ark::GCTaskCause::EXPLICIT_CAUSE));
288 // Collect all heap for EXPLICIT_CAUSE
289 ASSERT_EQ(table->Size(), tableInitSize);
290 }
291
InternTooLongString()292 void InternTooLongString()
293 {
294 ScopedManagedCodeThread s(thread_);
295 auto table = StringTable();
296 auto *runtime = Runtime::GetCurrent();
297 auto pandaClassContext = runtime->GetLanguageContext(panda_file::SourceLang::PANDA_ASSEMBLY);
298
299 auto *thread = ManagedThread::GetCurrent();
300
301 [[maybe_unused]] HandleScope<ObjectHeader *> scope(thread);
302
303 PandaVector<VMHandle<ObjectHeader>> objects;
304 constexpr size_t STRING_DATA_SIZE = 20000U;
305 constexpr uint8_t START_VALUE_IN_STRING_DATA = 0x30;
306 std::vector<uint8_t> stringData(STRING_DATA_SIZE, START_VALUE_IN_STRING_DATA);
307 stringData.push_back(0x00);
308
309 auto fillHeap = [&stringData, &thread, &objects](bool isMovable) {
310 while (true) {
311 auto *obj = AllocUtf8String(stringData, isMovable);
312 if (obj == nullptr) {
313 thread->ClearException();
314 break;
315 }
316 objects.emplace_back(thread, obj);
317 }
318 };
319
320 {
321 fillHeap(true);
322 auto *res = table.GetOrInternString(stringData.data(), stringData.size() - 1, pandaClassContext);
323 ASSERT_EQ(res, nullptr);
324 ManagedThread::GetCurrent()->ClearException();
325 }
326
327 {
328 panda_file::ItemContainer container;
329 panda_file::MemoryWriter writer;
330
331 auto *stringItem = container.GetOrCreateStringItem(reinterpret_cast<char *>(stringData.data()));
332
333 container.Write(&writer);
334 auto data = writer.GetData();
335
336 auto id = panda_file::File::EntityId(stringItem->GetOffset());
337
338 os::mem::ConstBytePtr ptr(reinterpret_cast<std::byte *>(data.data()), data.size(),
339 [](std::byte *, size_t) noexcept {});
340
341 auto pf = panda_file::File::OpenFromMemory(std::move(ptr));
342
343 fillHeap(false);
344 auto *res = table.GetOrInternInternalString(*pf, id, pandaClassContext);
345 ASSERT_EQ(res, nullptr);
346 ManagedThread::GetCurrent()->ClearException();
347 }
348 }
349
350 protected:
351 ark::MTManagedThread *thread_; // NOLINT(misc-non-private-member-variables-in-classes)
352 };
353
TEST_P(StringTableTest,StringTableGCsTest)354 TEST_P(StringTableTest, StringTableGCsTest)
355 {
356 RunStringTableTests();
357 }
358
359 INSTANTIATE_TEST_SUITE_P(StringTableTestOnDiffGCs, StringTableTest, ::testing::ValuesIn(TESTED_GC));
360 } // namespace ark::mem::test
361
362 #endif // STRING_TABLE_BASE_TEST_H
363