1 /*
2 *
3 * Copyright 2016 gRPC authors.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19 #include "src/cpp/thread_manager/thread_manager.h"
20
21 #include <climits>
22
23 #include <grpc/support/log.h>
24 #include "src/core/lib/gprpp/thd.h"
25 #include "src/core/lib/iomgr/exec_ctx.h"
26
27 namespace grpc {
28
WorkerThread(ThreadManager * thd_mgr)29 ThreadManager::WorkerThread::WorkerThread(ThreadManager* thd_mgr)
30 : thd_mgr_(thd_mgr) {
31 // Make thread creation exclusive with respect to its join happening in
32 // ~WorkerThread().
33 thd_ = grpc_core::Thread(
34 "grpcpp_sync_server",
35 [](void* th) { static_cast<ThreadManager::WorkerThread*>(th)->Run(); },
36 this, &created_);
37 if (!created_) {
38 gpr_log(GPR_ERROR, "Could not create grpc_sync_server worker-thread");
39 }
40 }
41
Run()42 void ThreadManager::WorkerThread::Run() {
43 thd_mgr_->MainWorkLoop();
44 thd_mgr_->MarkAsCompleted(this);
45 }
46
~WorkerThread()47 ThreadManager::WorkerThread::~WorkerThread() {
48 // Don't join until the thread is fully constructed.
49 thd_.Join();
50 }
51
ThreadManager(const char * name,grpc_resource_quota * resource_quota,int min_pollers,int max_pollers)52 ThreadManager::ThreadManager(const char* name,
53 grpc_resource_quota* resource_quota,
54 int min_pollers, int max_pollers)
55 : shutdown_(false),
56 num_pollers_(0),
57 min_pollers_(min_pollers),
58 max_pollers_(max_pollers == -1 ? INT_MAX : max_pollers),
59 num_threads_(0),
60 max_active_threads_sofar_(0) {
61 resource_user_ = grpc_resource_user_create(resource_quota, name);
62 }
63
~ThreadManager()64 ThreadManager::~ThreadManager() {
65 {
66 grpc_core::MutexLock lock(&mu_);
67 GPR_ASSERT(num_threads_ == 0);
68 }
69
70 grpc_core::ExecCtx exec_ctx; // grpc_resource_user_unref needs an exec_ctx
71 grpc_resource_user_unref(resource_user_);
72 CleanupCompletedThreads();
73 }
74
Wait()75 void ThreadManager::Wait() {
76 grpc_core::MutexLock lock(&mu_);
77 while (num_threads_ != 0) {
78 shutdown_cv_.Wait(&mu_);
79 }
80 }
81
Shutdown()82 void ThreadManager::Shutdown() {
83 grpc_core::MutexLock lock(&mu_);
84 shutdown_ = true;
85 }
86
IsShutdown()87 bool ThreadManager::IsShutdown() {
88 grpc_core::MutexLock lock(&mu_);
89 return shutdown_;
90 }
91
GetMaxActiveThreadsSoFar()92 int ThreadManager::GetMaxActiveThreadsSoFar() {
93 grpc_core::MutexLock list_lock(&list_mu_);
94 return max_active_threads_sofar_;
95 }
96
MarkAsCompleted(WorkerThread * thd)97 void ThreadManager::MarkAsCompleted(WorkerThread* thd) {
98 {
99 grpc_core::MutexLock list_lock(&list_mu_);
100 completed_threads_.push_back(thd);
101 }
102
103 {
104 grpc_core::MutexLock lock(&mu_);
105 num_threads_--;
106 if (num_threads_ == 0) {
107 shutdown_cv_.Signal();
108 }
109 }
110
111 // Give a thread back to the resource quota
112 grpc_resource_user_free_threads(resource_user_, 1);
113 }
114
CleanupCompletedThreads()115 void ThreadManager::CleanupCompletedThreads() {
116 std::list<WorkerThread*> completed_threads;
117 {
118 // swap out the completed threads list: allows other threads to clean up
119 // more quickly
120 grpc_core::MutexLock lock(&list_mu_);
121 completed_threads.swap(completed_threads_);
122 }
123 for (auto thd : completed_threads) delete thd;
124 }
125
Initialize()126 void ThreadManager::Initialize() {
127 if (!grpc_resource_user_allocate_threads(resource_user_, min_pollers_)) {
128 gpr_log(GPR_ERROR,
129 "No thread quota available to even create the minimum required "
130 "polling threads (i.e %d). Unable to start the thread manager",
131 min_pollers_);
132 abort();
133 }
134
135 {
136 grpc_core::MutexLock lock(&mu_);
137 num_pollers_ = min_pollers_;
138 num_threads_ = min_pollers_;
139 max_active_threads_sofar_ = min_pollers_;
140 }
141
142 for (int i = 0; i < min_pollers_; i++) {
143 WorkerThread* worker = new WorkerThread(this);
144 GPR_ASSERT(worker->created()); // Must be able to create the minimum
145 worker->Start();
146 }
147 }
148
MainWorkLoop()149 void ThreadManager::MainWorkLoop() {
150 while (true) {
151 void* tag;
152 bool ok;
153 WorkStatus work_status = PollForWork(&tag, &ok);
154
155 grpc_core::ReleasableMutexLock lock(&mu_);
156 // Reduce the number of pollers by 1 and check what happened with the poll
157 num_pollers_--;
158 bool done = false;
159 switch (work_status) {
160 case TIMEOUT:
161 // If we timed out and we have more pollers than we need (or we are
162 // shutdown), finish this thread
163 if (shutdown_ || num_pollers_ > max_pollers_) done = true;
164 break;
165 case SHUTDOWN:
166 // If the thread manager is shutdown, finish this thread
167 done = true;
168 break;
169 case WORK_FOUND:
170 // If we got work and there are now insufficient pollers and there is
171 // quota available to create a new thread, start a new poller thread
172 bool resource_exhausted = false;
173 if (!shutdown_ && num_pollers_ < min_pollers_) {
174 if (grpc_resource_user_allocate_threads(resource_user_, 1)) {
175 // We can allocate a new poller thread
176 num_pollers_++;
177 num_threads_++;
178 if (num_threads_ > max_active_threads_sofar_) {
179 max_active_threads_sofar_ = num_threads_;
180 }
181 // Drop lock before spawning thread to avoid contention
182 lock.Unlock();
183 WorkerThread* worker = new WorkerThread(this);
184 if (worker->created()) {
185 worker->Start();
186 } else {
187 num_pollers_--;
188 num_threads_--;
189 resource_exhausted = true;
190 delete worker;
191 }
192 } else if (num_pollers_ > 0) {
193 // There is still at least some thread polling, so we can go on
194 // even though we are below the number of pollers that we would
195 // like to have (min_pollers_)
196 lock.Unlock();
197 } else {
198 // There are no pollers to spare and we couldn't allocate
199 // a new thread, so resources are exhausted!
200 lock.Unlock();
201 resource_exhausted = true;
202 }
203 } else {
204 // There are a sufficient number of pollers available so we can do
205 // the work and continue polling with our existing poller threads
206 lock.Unlock();
207 }
208 // Lock is always released at this point - do the application work
209 // or return resource exhausted if there is new work but we couldn't
210 // get a thread in which to do it.
211 DoWork(tag, ok, !resource_exhausted);
212 // Take the lock again to check post conditions
213 lock.Lock();
214 // If we're shutdown, we should finish at this point.
215 if (shutdown_) done = true;
216 break;
217 }
218 // If we decided to finish the thread, break out of the while loop
219 if (done) break;
220
221 // Otherwise go back to polling as long as it doesn't exceed max_pollers_
222 //
223 // **WARNING**:
224 // There is a possibility of threads thrashing here (i.e excessive thread
225 // shutdowns and creations than the ideal case). This happens if max_poller_
226 // count is small and the rate of incoming requests is also small. In such
227 // scenarios we can possibly configure max_pollers_ to a higher value and/or
228 // increase the cq timeout.
229 //
230 // However, not doing this check here and unconditionally incrementing
231 // num_pollers (and hoping that the system will eventually settle down) has
232 // far worse consequences i.e huge number of threads getting created to the
233 // point of thread-exhaustion. For example: if the incoming request rate is
234 // very high, all the polling threads will return very quickly from
235 // PollForWork() with WORK_FOUND. They all briefly decrement num_pollers_
236 // counter thereby possibly - and briefly - making it go below min_pollers;
237 // This will most likely result in the creation of a new poller since
238 // num_pollers_ dipped below min_pollers_.
239 //
240 // Now, If we didn't do the max_poller_ check here, all these threads will
241 // go back to doing PollForWork() and the whole cycle repeats (with a new
242 // thread being added in each cycle). Once the total number of threads in
243 // the system crosses a certain threshold (around ~1500), there is heavy
244 // contention on mutexes (the mu_ here or the mutexes in gRPC core like the
245 // pollset mutex) that makes DoWork() take longer to finish thereby causing
246 // new poller threads to be created even faster. This results in a thread
247 // avalanche.
248 if (num_pollers_ < max_pollers_) {
249 num_pollers_++;
250 } else {
251 break;
252 }
253 };
254
255 // This thread is exiting. Do some cleanup work i.e delete already completed
256 // worker threads
257 CleanupCompletedThreads();
258
259 // If we are here, either ThreadManager is shutting down or it already has
260 // enough threads.
261 }
262
263 } // namespace grpc
264