1 /**
2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "runtime/string_table.h"
17
18 #include "runtime/include/runtime.h"
19 #include "runtime/mem/object_helpers.h"
20
21 namespace ark {
22
GetOrInternString(const uint8_t * mutf8Data,uint32_t utf16Length,const LanguageContext & ctx)23 coretypes::String *StringTable::GetOrInternString(const uint8_t *mutf8Data, uint32_t utf16Length,
24 const LanguageContext &ctx)
25 {
26 bool canBeCompressed = coretypes::String::CanBeCompressedMUtf8(mutf8Data);
27 auto *str = internalTable_.GetString(mutf8Data, utf16Length, canBeCompressed, ctx);
28 if (str == nullptr) {
29 str = table_.GetOrInternString(mutf8Data, utf16Length, canBeCompressed, ctx);
30 }
31 return str;
32 }
33
GetOrInternString(const uint16_t * utf16Data,uint32_t utf16Length,const LanguageContext & ctx)34 coretypes::String *StringTable::GetOrInternString(const uint16_t *utf16Data, uint32_t utf16Length,
35 const LanguageContext &ctx)
36 {
37 auto *str = internalTable_.GetString(utf16Data, utf16Length, ctx);
38 if (str == nullptr) {
39 str = table_.GetOrInternString(utf16Data, utf16Length, ctx);
40 }
41 return str;
42 }
43
GetOrInternString(coretypes::String * string,const LanguageContext & ctx)44 coretypes::String *StringTable::GetOrInternString(coretypes::String *string, const LanguageContext &ctx)
45 {
46 auto *str = internalTable_.GetString(string, ctx);
47 if (str == nullptr) {
48 str = table_.GetOrInternString(string, ctx);
49 }
50 return str;
51 }
52
GetOrInternInternalString(const panda_file::File & pf,panda_file::File::EntityId id,const LanguageContext & ctx)53 coretypes::String *StringTable::GetOrInternInternalString(const panda_file::File &pf, panda_file::File::EntityId id,
54 const LanguageContext &ctx)
55 {
56 auto data = pf.GetStringData(id);
57
58 coretypes::String *str = table_.GetString(data.data, data.utf16Length, data.isAscii, ctx);
59 if (str != nullptr) {
60 table_.PreBarrierOnGet(str);
61 return str;
62 }
63 return internalTable_.GetOrInternString(pf, id, ctx);
64 }
65
Sweep(const GCObjectVisitor & gcObjectVisitor)66 void StringTable::Sweep(const GCObjectVisitor &gcObjectVisitor)
67 {
68 table_.Sweep(gcObjectVisitor);
69 }
70
UpdateMoved(const GCRootUpdater & gcRootUpdater)71 bool StringTable::UpdateMoved(const GCRootUpdater &gcRootUpdater)
72 {
73 return table_.UpdateMoved(gcRootUpdater);
74 }
75
Size()76 size_t StringTable::Size()
77 {
78 return internalTable_.Size() + table_.Size();
79 }
80
VisitStrings(const StringVisitor & visitor)81 void StringTable::Table::VisitStrings(const StringVisitor &visitor)
82 {
83 os::memory::ReadLockHolder holder(tableLock_);
84 for (auto entry : table_) {
85 visitor(entry.second);
86 }
87 }
88
GetString(const uint8_t * utf8Data,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx)89 coretypes::String *StringTable::Table::GetString(const uint8_t *utf8Data, uint32_t utf16Length, bool canBeCompressed,
90 [[maybe_unused]] const LanguageContext &ctx)
91 {
92 uint32_t hashCode = coretypes::String::ComputeHashcodeMutf8(utf8Data, utf16Length, canBeCompressed);
93 os::memory::ReadLockHolder holder(tableLock_);
94 for (auto it = table_.find(hashCode); it != table_.end(); it++) {
95 auto foundString = it->second;
96 if (coretypes::String::StringsAreEqualMUtf8(foundString, utf8Data, utf16Length, canBeCompressed)) {
97 return foundString;
98 }
99 }
100 return nullptr;
101 }
102
GetString(const uint16_t * utf16Data,uint32_t utf16Length,const LanguageContext & ctx)103 coretypes::String *StringTable::Table::GetString(const uint16_t *utf16Data, uint32_t utf16Length,
104 [[maybe_unused]] const LanguageContext &ctx)
105 {
106 uint32_t hashCode = coretypes::String::ComputeHashcodeUtf16(const_cast<uint16_t *>(utf16Data), utf16Length);
107 os::memory::ReadLockHolder holder(tableLock_);
108 for (auto it = table_.find(hashCode); it != table_.end(); it++) {
109 auto foundString = it->second;
110 if (coretypes::String::StringsAreEqualUtf16(foundString, utf16Data, utf16Length)) {
111 return foundString;
112 }
113 }
114 return nullptr;
115 }
116
GetString(coretypes::String * string,const LanguageContext & ctx)117 coretypes::String *StringTable::Table::GetString(coretypes::String *string, [[maybe_unused]] const LanguageContext &ctx)
118 {
119 ASSERT(string != nullptr);
120 os::memory::ReadLockHolder holder(tableLock_);
121 auto hash = string->GetHashcode();
122 for (auto it = table_.find(hash); it != table_.end(); it++) {
123 auto foundString = it->second;
124 if (coretypes::String::StringsAreEqual(foundString, string)) {
125 return foundString;
126 }
127 }
128 return nullptr;
129 }
130
ForceInternString(coretypes::String * string,const LanguageContext & ctx)131 void StringTable::Table::ForceInternString(coretypes::String *string, [[maybe_unused]] const LanguageContext &ctx)
132 {
133 os::memory::WriteLockHolder holder(tableLock_);
134 table_.insert(std::pair<uint32_t, coretypes::String *>(string->GetHashcode(), string));
135 }
136
InternString(coretypes::String * string,const LanguageContext & ctx)137 coretypes::String *StringTable::Table::InternString(coretypes::String *string,
138 [[maybe_unused]] const LanguageContext &ctx)
139 {
140 ASSERT(string != nullptr);
141 uint32_t hashCode = string->GetHashcode();
142 os::memory::WriteLockHolder holder(tableLock_);
143 // Check string is not present before actually creating and inserting
144 for (auto it = table_.find(hashCode); it != table_.end(); it++) {
145 auto foundString = it->second;
146 if (coretypes::String::StringsAreEqual(foundString, string)) {
147 return foundString;
148 }
149 }
150 table_.insert(std::pair<uint32_t, coretypes::String *>(hashCode, string));
151 return string;
152 }
153
PreBarrierOnGet(coretypes::String * str)154 void StringTable::Table::PreBarrierOnGet(coretypes::String *str)
155 {
156 // Need pre barrier if string exists in string table, because this string can be got from the
157 // string table (like phoenix) and write to a field during concurrent phase and GC does not see it on Remark
158 ASSERT_MANAGED_CODE();
159 auto *preWrb = Thread::GetCurrent()->GetPreWrbEntrypoint();
160 if (preWrb != nullptr) {
161 reinterpret_cast<mem::ObjRefProcessFunc>(preWrb)(str);
162 }
163 }
164
GetOrInternString(const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx)165 coretypes::String *StringTable::Table::GetOrInternString(const uint8_t *mutf8Data, uint32_t utf16Length,
166 bool canBeCompressed, const LanguageContext &ctx)
167 {
168 coretypes::String *result = GetString(mutf8Data, utf16Length, canBeCompressed, ctx);
169 if (result != nullptr) {
170 PreBarrierOnGet(result);
171 return result;
172 }
173
174 // Even if this string is not inserted, it should get removed during GC
175 result =
176 coretypes::String::CreateFromMUtf8(mutf8Data, utf16Length, canBeCompressed, ctx, Thread::GetCurrent()->GetVM());
177 if (UNLIKELY(result == nullptr)) {
178 return nullptr;
179 }
180 result = InternString(result, ctx);
181
182 return result;
183 }
184
GetOrInternString(const uint16_t * utf16Data,uint32_t utf16Length,const LanguageContext & ctx)185 coretypes::String *StringTable::Table::GetOrInternString(const uint16_t *utf16Data, uint32_t utf16Length,
186 const LanguageContext &ctx)
187 {
188 coretypes::String *result = GetString(utf16Data, utf16Length, ctx);
189 if (result != nullptr) {
190 PreBarrierOnGet(result);
191 return result;
192 }
193
194 // Even if this string is not inserted, it should get removed during GC
195 result = coretypes::String::CreateFromUtf16(utf16Data, utf16Length, ctx, Thread::GetCurrent()->GetVM());
196 if (UNLIKELY(result == nullptr)) {
197 return nullptr;
198 }
199
200 result = InternString(result, ctx);
201
202 return result;
203 }
204
GetOrInternString(coretypes::String * string,const LanguageContext & ctx)205 coretypes::String *StringTable::Table::GetOrInternString(coretypes::String *string, const LanguageContext &ctx)
206 {
207 coretypes::String *result = GetString(string, ctx);
208 if (result != nullptr) {
209 PreBarrierOnGet(result);
210 return result;
211 }
212 result = InternString(string, ctx);
213 return result;
214 }
215
UpdateMoved(const GCRootUpdater & gcRootUpdater)216 bool StringTable::Table::UpdateMoved(const GCRootUpdater &gcRootUpdater)
217 {
218 os::memory::WriteLockHolder holder(tableLock_);
219 LOG(DEBUG, GC) << "=== StringTable Update moved. BEGIN ===";
220 LOG(DEBUG, GC) << "Iterate over: " << table_.size() << " elements in string table";
221 bool updated = false;
222 for (auto it = table_.begin(), end = table_.end(); it != end;) {
223 ObjectHeader *object = it->second;
224 if (gcRootUpdater(&object)) {
225 it->second = static_cast<coretypes::String *>(object);
226 LOG(DEBUG, GC) << "StringTable: forwarded " << std::hex << object;
227 updated = true;
228 }
229 ++it;
230 }
231 LOG(DEBUG, GC) << "=== StringTable Update moved. END ===";
232 return updated;
233 }
234
235 // NOTE(alovkov): make parallel
Sweep(const GCObjectVisitor & gcObjectVisitor)236 void StringTable::Table::Sweep(const GCObjectVisitor &gcObjectVisitor)
237 {
238 os::memory::WriteLockHolder holder(tableLock_);
239 LOG(DEBUG, GC) << "=== StringTable Sweep. BEGIN ===";
240 LOG(DEBUG, GC) << "StringTable iterate over: " << table_.size() << " elements in string table";
241 for (auto it = table_.begin(), end = table_.end(); it != end;) {
242 auto *object = it->second;
243 if (gcObjectVisitor(object) == ObjectStatus::ALIVE_OBJECT) {
244 // All references in the string table must be updated before.
245 ASSERT(!object->IsForwarded());
246 ++it;
247 } else if (gcObjectVisitor(object) == ObjectStatus::DEAD_OBJECT) {
248 LOG(DEBUG, GC) << "StringTable: delete string " << std::hex << object
249 << ", val = " << ConvertToString(object);
250 table_.erase(it++);
251 }
252 }
253 LOG(DEBUG, GC) << "StringTable size after sweep = " << table_.size();
254 LOG(DEBUG, GC) << "=== StringTable Sweep. END ===";
255 }
256
Size()257 size_t StringTable::Table::Size()
258 {
259 os::memory::ReadLockHolder holder(tableLock_);
260 return table_.size();
261 }
262
GetOrInternString(const uint8_t * mutf8Data,uint32_t utf16Length,bool canBeCompressed,const LanguageContext & ctx)263 coretypes::String *StringTable::InternalTable::GetOrInternString(const uint8_t *mutf8Data, uint32_t utf16Length,
264 bool canBeCompressed, const LanguageContext &ctx)
265 {
266 coretypes::String *result = GetString(mutf8Data, utf16Length, canBeCompressed, ctx);
267 if (result != nullptr) {
268 return result;
269 }
270
271 result = coretypes::String::CreateFromMUtf8(mutf8Data, utf16Length, canBeCompressed, ctx,
272 Thread::GetCurrent()->GetVM(), false);
273 if (UNLIKELY(result == nullptr)) {
274 return nullptr;
275 }
276 result = InternStringNonMovable(result, ctx);
277 return result;
278 }
279
GetOrInternString(const uint16_t * utf16Data,uint32_t utf16Length,const LanguageContext & ctx)280 coretypes::String *StringTable::InternalTable::GetOrInternString(const uint16_t *utf16Data, uint32_t utf16Length,
281 const LanguageContext &ctx)
282 {
283 coretypes::String *result = GetString(utf16Data, utf16Length, ctx);
284 if (result != nullptr) {
285 return result;
286 }
287
288 result = coretypes::String::CreateFromUtf16(utf16Data, utf16Length, ctx, Thread::GetCurrent()->GetVM(), false);
289 if (UNLIKELY(result == nullptr)) {
290 return nullptr;
291 }
292 result = InternStringNonMovable(result, ctx);
293 return result;
294 }
295
GetOrInternString(coretypes::String * string,const LanguageContext & ctx)296 coretypes::String *StringTable::InternalTable::GetOrInternString(coretypes::String *string, const LanguageContext &ctx)
297 {
298 coretypes::String *result = GetString(string, ctx);
299 if (result != nullptr) {
300 return result;
301 }
302 result = InternString(string, ctx);
303 return result;
304 }
305
GetOrInternString(const panda_file::File & pf,panda_file::File::EntityId id,const LanguageContext & ctx)306 coretypes::String *StringTable::InternalTable::GetOrInternString(const panda_file::File &pf,
307 panda_file::File::EntityId id,
308 const LanguageContext &ctx)
309 {
310 auto data = pf.GetStringData(id);
311 coretypes::String *result = GetString(data.data, data.utf16Length, data.isAscii, ctx);
312 if (result != nullptr) {
313 return result;
314 }
315 result = coretypes::String::CreateFromMUtf8(data.data, data.utf16Length, data.isAscii, ctx,
316 Thread::GetCurrent()->GetVM(), false);
317 if (UNLIKELY(result == nullptr)) {
318 return nullptr;
319 }
320
321 result = InternStringNonMovable(result, ctx);
322
323 // Update cache.
324 os::memory::WriteLockHolder lock(mapsLock_);
325 auto it = maps_.find(&pf);
326 if (it != maps_.end()) {
327 (it->second)[id] = result;
328 } else {
329 PandaUnorderedMap<panda_file::File::EntityId, coretypes::String *, EntityIdEqual> map;
330 map[id] = result;
331 maps_[&pf] = std::move(map);
332 }
333 return result;
334 }
335
GetStringFast(const panda_file::File & pf,panda_file::File::EntityId id)336 coretypes::String *StringTable::InternalTable::GetStringFast(const panda_file::File &pf, panda_file::File::EntityId id)
337 {
338 os::memory::ReadLockHolder lock(mapsLock_);
339 auto it = maps_.find(&pf);
340 if (it != maps_.end()) {
341 auto idIt = it->second.find(id);
342 if (idIt != it->second.end()) {
343 return idIt->second;
344 }
345 }
346 return nullptr;
347 }
348
VisitRoots(const StringVisitor & visitor,mem::VisitGCRootFlags flags)349 void StringTable::InternalTable::VisitRoots(const StringVisitor &visitor, mem::VisitGCRootFlags flags)
350 {
351 ASSERT(BitCount(flags & (mem::VisitGCRootFlags::ACCESS_ROOT_ALL | mem::VisitGCRootFlags::ACCESS_ROOT_ONLY_NEW)) ==
352 1);
353
354 ASSERT(BitCount(flags & (mem::VisitGCRootFlags::START_RECORDING_NEW_ROOT |
355 mem::VisitGCRootFlags::END_RECORDING_NEW_ROOT)) <= 1);
356 // need to set flags before we iterate, because concurrent allocation should be in proper table
357 if ((flags & mem::VisitGCRootFlags::START_RECORDING_NEW_ROOT) != 0) {
358 os::memory::WriteLockHolder holder(tableLock_);
359 recordNewString_ = true;
360 } else if ((flags & mem::VisitGCRootFlags::END_RECORDING_NEW_ROOT) != 0) {
361 os::memory::WriteLockHolder holder(tableLock_);
362 recordNewString_ = false;
363 }
364
365 if ((flags & mem::VisitGCRootFlags::ACCESS_ROOT_ALL) != 0) {
366 os::memory::ReadLockHolder lock(tableLock_);
367 for (const auto &v : table_) {
368 visitor(v.second);
369 }
370 } else if ((flags & mem::VisitGCRootFlags::ACCESS_ROOT_ONLY_NEW) != 0) {
371 os::memory::ReadLockHolder lock(tableLock_);
372 for (const auto str : newStringTable_) {
373 visitor(str);
374 }
375 } else {
376 LOG(FATAL, RUNTIME) << "Unknown VisitGCRootFlags: " << static_cast<uint32_t>(flags);
377 }
378 if ((flags & mem::VisitGCRootFlags::END_RECORDING_NEW_ROOT) != 0) {
379 os::memory::WriteLockHolder holder(tableLock_);
380 newStringTable_.clear();
381 }
382 }
383
InternStringNonMovable(coretypes::String * string,const LanguageContext & ctx)384 coretypes::String *StringTable::InternalTable::InternStringNonMovable(coretypes::String *string,
385 const LanguageContext &ctx)
386 {
387 auto *result = InternString(string, ctx);
388 os::memory::WriteLockHolder holder(tableLock_);
389 if (recordNewString_) {
390 newStringTable_.push_back(result);
391 }
392 return result;
393 }
394
395 } // namespace ark
396