1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "update_remset_thread.h"
17 #include "libpandabase/utils/logger.h"
18 #include "runtime/include/runtime.h"
19 #include "runtime/include/thread.h"
20 #include "runtime/include/managed_thread.h"
21 #include "runtime/include/panda_vm.h"
22 #include "runtime/mem/rem_set-inl.h"
23 #include "runtime/mem/gc/gc.h"
24 #include "runtime/mem/gc/gc_phase.h"
25
26 namespace panda::mem {
27
28 static constexpr size_t PREALLOCATED_SET_SIZE = 256;
29
30 template <class LanguageConfig>
UpdateRemsetThread(GC * gc,PandaVM * vm,GCG1BarrierSet::ThreadLocalCardQueues * queue,os::memory::Mutex * queue_lock,size_t region_size,bool update_concurrent,size_t min_concurrent_cards_to_process,CardTable * card_table)31 UpdateRemsetThread<LanguageConfig>::UpdateRemsetThread(GC *gc, PandaVM *vm,
32 GCG1BarrierSet::ThreadLocalCardQueues *queue,
33 os::memory::Mutex *queue_lock, size_t region_size,
34 bool update_concurrent, size_t min_concurrent_cards_to_process,
35 CardTable *card_table)
36 : gc_(gc),
37 vm_(vm),
38 card_table_(card_table),
39 queue_(queue),
40 queue_lock_(queue_lock),
41 update_concurrent_(update_concurrent),
42 region_size_bits_(panda::helpers::math::GetIntLog2(region_size)),
43 min_concurrent_cards_to_process_(min_concurrent_cards_to_process)
44
45 {
46 cards_.reserve(PREALLOCATED_SET_SIZE);
47 }
48
49 template <class LanguageConfig>
CreateThread(InternalAllocatorPtr internal_allocator)50 void UpdateRemsetThread<LanguageConfig>::CreateThread(InternalAllocatorPtr internal_allocator)
51 {
52 ASSERT(internal_allocator != nullptr);
53 if (update_concurrent_) {
54 LOG(DEBUG, GC) << "Start creating UpdateRemsetThread";
55
56 os::memory::LockHolder holder(loop_lock);
57 stop_thread_ = false;
58 // dont reset pause_thread_ here WaitUntilTasksEnd does it, and we can reset pause_thread_ by accident here,
59 // because we set it without lock
60 ASSERT(update_thread_ == nullptr);
61 update_thread_ = internal_allocator->New<std::thread>(&UpdateRemsetThread::ThreadLoop, this);
62 int res = os::thread::SetThreadName(update_thread_->native_handle(), "UpdateRemset");
63 if (res != 0) {
64 LOG(ERROR, RUNTIME) << "Failed to set a name for the UpdateRemset thread";
65 }
66 }
67 }
68
69 template <class LanguageConfig>
DestroyThread()70 void UpdateRemsetThread<LanguageConfig>::DestroyThread()
71 {
72 if (update_concurrent_) {
73 stop_thread_ = true;
74 LOG(DEBUG, GC) << "Starting destroy UpdateRemsetThread";
75 {
76 os::memory::LockHolder holder(loop_lock);
77 thread_cond_var_.SignalAll(); // wake up update_thread & pause method
78 }
79 ASSERT(update_thread_ != nullptr);
80 update_thread_->join();
81 auto allocator = Runtime::GetCurrent()->GetInternalAllocator();
82 ASSERT(allocator != nullptr);
83 allocator->Delete(update_thread_);
84 update_thread_ = nullptr;
85 LOG(DEBUG, GC) << "UpdateRemsetThread was destroyed";
86 }
87 }
88
89 template <class LanguageConfig>
StartThread()90 void UpdateRemsetThread<LanguageConfig>::StartThread()
91 {
92 if (update_concurrent_) {
93 LOG(DEBUG, GC) << "Start UpdateRemsetThread";
94 {
95 os::memory::LockHolder holder(loop_lock);
96 ASSERT(update_thread_ != nullptr);
97 pause_thread_ = false;
98 thread_cond_var_.Signal();
99 }
100 }
101 }
102
103 // TODO(alovkov): GC-thread can help to update-thread to process all cards concurrently
104 template <class LanguageConfig>
WaitUntilTasksEnd()105 void UpdateRemsetThread<LanguageConfig>::WaitUntilTasksEnd()
106 {
107 pause_thread_ = true; // either ThreadLoop should set it to false, or this function if we don't have a thread
108 if (update_concurrent_ && !stop_thread_) {
109 LOG(DEBUG, GC) << "Starting pause UpdateRemsetThread";
110
111 os::memory::LockHolder holder(loop_lock);
112 while (pause_thread_) {
113 // runtime is destroying, handle all refs anyway for now
114 if (stop_thread_ || update_thread_ == nullptr) {
115 ProcessAllCards(); // Process all cards inside gc
116 pause_thread_ = false;
117 break;
118 }
119 thread_cond_var_.Signal();
120 thread_cond_var_.Wait(&loop_lock);
121 }
122 ASSERT(GetQueueSize() == 0);
123 } else {
124 os::memory::LockHolder holder(loop_lock);
125 // we will handle all remsets even when thread is stopped (we are trying to destroy Runtime, but it's the last
126 // GC), try to eliminate it in the future for faster shutdown
127 ProcessAllCards(); // Process all cards inside gc
128 pause_thread_ = false;
129 }
130 thread_cond_var_.Signal();
131 stats_.PrintStats();
132 stats_.Reset();
133 ASSERT(GetQueueSize() == 0);
134 ASSERT(!pause_thread_);
135 }
136
137 template <class LanguageConfig>
ThreadLoop()138 void UpdateRemsetThread<LanguageConfig>::ThreadLoop()
139 {
140 LOG(DEBUG, GC) << "Entering UpdateRemsetThread ThreadLoop";
141
142 loop_lock.Lock();
143 while (true) {
144 if (stop_thread_) {
145 LOG(DEBUG, GC) << "exit UpdateRemsetThread loop, because thread was stopped";
146 break;
147 }
148 if (pause_thread_) {
149 // gc is waiting for us to handle all updates
150 // possible improvements: let GC thread to help us to handle elements in queue in parallel, instead of
151 // waiting
152 ProcessAllCards(); // Process all cards inside gc
153 pause_thread_ = false;
154 thread_cond_var_.Signal(); // notify GC thread that we processed all updates
155 thread_cond_var_.Wait(&loop_lock); // let WaitUntilTasksEnd to finish
156 continue;
157 }
158 if (invalidate_regions_ != nullptr) {
159 for (const auto ®ion : *invalidate_regions_) {
160 // don't need lock because only update_remset_thread changes remsets
161 RemSet<>::template InvalidateRegion<false>(region);
162 }
163 invalidate_regions_ = nullptr;
164 thread_cond_var_.Signal();
165 Sleep();
166 }
167 if (need_invalidate_region_) {
168 Sleep();
169 continue;
170 }
171 auto processed_cards = ProcessAllCards();
172
173 auto phase = gc_->GetGCPhase();
174 if (phase != GCPhase::GC_PHASE_IDLE) { // means GC is in progress now and we need to process more aggressively
175 continue;
176 } else if (processed_cards < min_concurrent_cards_to_process_) { // NOLINT(readability-else-after-return)
177 Sleep();
178 }
179 }
180 loop_lock.Unlock();
181 LOG(DEBUG, GC) << "Exiting UpdateRemsetThread ThreadLoop";
182 }
183
184 template <class LanguageConfig>
HandleCard(CardTable::CardPtr card_ptr)185 void UpdateRemsetThread<LanguageConfig>::HandleCard(CardTable::CardPtr card_ptr)
186 {
187 LOG(DEBUG, GC) << "HandleCard card: " << ToVoidPtr(card_table_->GetCardStartAddress(card_ptr));
188 // TODO(alovkov): eliminate atomics in cards, force FullMemoryBarrier here, all mutators will be faster
189
190 // clear card before we process it, because parallel mutator thread can make a write and we would miss it
191 card_ptr->Clear();
192
193 auto rem_set_fill = [this](void *mem) {
194 auto object_header = static_cast<ObjectHeader *>(mem);
195 auto obj_ref_visitor = [this](ObjectHeader *from_obj, ObjectHeader *to_obj) {
196 auto to_obj_space_type = PoolManager::GetMmapMemPool()->GetSpaceTypeForAddr(to_obj);
197
198 switch (to_obj_space_type) {
199 case SpaceType::SPACE_TYPE_OBJECT:
200 case SpaceType::SPACE_TYPE_NON_MOVABLE_OBJECT:
201 case SpaceType::SPACE_TYPE_HUMONGOUS_OBJECT:
202 if (((ToUintPtr(from_obj) ^ ToUintPtr(to_obj)) >> region_size_bits_) != 0) {
203 // don't need lock because only update_remset_thread changes remsets
204 RemSet<>::AddRefWithAddr<false>(from_obj, to_obj);
205 LOG(DEBUG, GC) << "fill rem set " << std::hex << from_obj << " -> " << to_obj;
206 }
207 break;
208 default:
209 LOG(FATAL, GC) << "Not suitable space for to_obj: " << std::hex << to_obj;
210 break;
211 }
212 };
213 if (object_header->ClassAddr<BaseClass>() != nullptr) {
214 // Class may be null when we are visiting a card and at the same time a new non-movable object
215 // is allocated in the memory region covered by the card.
216 ObjectHelpers<LanguageConfig::LANG_TYPE>::TraverseAllObjects(object_header, obj_ref_visitor);
217 }
218 };
219 // process all cards
220 auto mmap_mempool = PoolManager::GetMmapMemPool();
221 auto space_type = mmap_mempool->GetSpaceTypeForAddr(ToVoidPtr(card_table_->GetCardStartAddress(card_ptr)));
222 switch (space_type) {
223 case SpaceType::SPACE_TYPE_OBJECT:
224 case SpaceType::SPACE_TYPE_NON_MOVABLE_OBJECT:
225 case SpaceType::SPACE_TYPE_HUMONGOUS_OBJECT: {
226 auto region = AddrToRegion(ToVoidPtr(card_table_->GetCardStartAddress(card_ptr)));
227 ASSERT(region != nullptr);
228 ASSERT(region->GetLiveBitmap() != nullptr);
229 region->GetLiveBitmap()->IterateOverMarkedChunkInRange<true>(
230 ToVoidPtr(card_table_->GetCardStartAddress(card_ptr)),
231 ToVoidPtr(card_table_->GetCardEndAddress(card_ptr)), rem_set_fill);
232 break;
233 }
234 default:
235 LOG(FATAL, GC) << "Invalid space type for the " << ToVoidPtr(card_table_->GetCardStartAddress(card_ptr));
236 break;
237 };
238 }
239
240 template <class LanguageConfig>
BuildSetFromQueue()241 void UpdateRemsetThread<LanguageConfig>::BuildSetFromQueue()
242 {
243 ASSERT(cards_.empty());
244 os::memory::LockHolder holder(*queue_lock_);
245 for (const auto &card : *queue_) {
246 cards_.insert(card);
247 }
248 queue_->clear();
249 }
250
251 template <class LanguageConfig>
DrainThreads()252 void UpdateRemsetThread<LanguageConfig>::DrainThreads()
253 {
254 ASSERT(vm_ != nullptr);
255 // NOLINTNEXTLINE(readability-braces-around-statements)
256 if constexpr (LanguageConfig::MT_MODE == MT_MODE_SINGLE) {
257 auto thread = vm_->GetAssociatedThread();
258 DrainThread(thread);
259 } else if (LanguageConfig::MT_MODE == MT_MODE_MULTI) { // NOLINT(readability-misleading-indentation)
260 auto *thread_manager = vm_->GetThreadManager();
261 ASSERT(thread_manager != nullptr);
262 // TODO(alovkov) if !inside_gc + too many from thread -> go to next thread.
263 thread_manager->EnumerateThreads(
264 [this](ManagedThread *thread) {
265 DrainThread(thread);
266 return true;
267 },
268 static_cast<unsigned int>(EnumerationFlag::ALL));
269 } else {
270 UNREACHABLE();
271 }
272 }
273
274 template <class LanguageConfig>
275
DrainThread(ManagedThread * thread)276 void UpdateRemsetThread<LanguageConfig>::DrainThread(ManagedThread *thread)
277 {
278 if (thread == nullptr) { // possible for ECMA
279 return;
280 }
281 auto cards = thread->GetG1PostBarrierBuffer();
282 if (cards == nullptr) {
283 return;
284 }
285 bool has_element;
286 while (true) {
287 mem::CardTable::CardPtr card;
288 has_element = cards->TryPop(&card);
289 if (!has_element) {
290 break;
291 }
292 cards_.insert(card);
293 }
294 }
295
296 template <class LanguageConfig>
ProcessAllCards()297 size_t UpdateRemsetThread<LanguageConfig>::ProcessAllCards()
298 {
299 BuildSetFromQueue();
300 DrainThreads();
301 auto cards_size = cards_.size();
302 if (!cards_.empty()) {
303 LOG(DEBUG, GC) << "Remset thread started process: " << cards_size << " cards";
304 }
305 for (const auto &card : cards_) {
306 HandleCard(card);
307 }
308 cards_.clear();
309 return cards_size;
310 }
311
312 TEMPLATE_CLASS_LANGUAGE_CONFIG(UpdateRemsetThread);
313 } // namespace panda::mem
314