• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * Copyright 2016 gRPC authors.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 #include "src/cpp/thread_manager/thread_manager.h"
20 
21 #include <climits>
22 
23 #include <grpc/support/log.h>
24 #include "src/core/lib/gprpp/thd.h"
25 #include "src/core/lib/iomgr/exec_ctx.h"
26 
27 namespace grpc {
28 
WorkerThread(ThreadManager * thd_mgr)29 ThreadManager::WorkerThread::WorkerThread(ThreadManager* thd_mgr)
30     : thd_mgr_(thd_mgr) {
31   // Make thread creation exclusive with respect to its join happening in
32   // ~WorkerThread().
33   thd_ = grpc_core::Thread(
34       "grpcpp_sync_server",
35       [](void* th) { static_cast<ThreadManager::WorkerThread*>(th)->Run(); },
36       this, &created_);
37   if (!created_) {
38     gpr_log(GPR_ERROR, "Could not create grpc_sync_server worker-thread");
39   }
40 }
41 
Run()42 void ThreadManager::WorkerThread::Run() {
43   thd_mgr_->MainWorkLoop();
44   thd_mgr_->MarkAsCompleted(this);
45 }
46 
~WorkerThread()47 ThreadManager::WorkerThread::~WorkerThread() {
48   // Don't join until the thread is fully constructed.
49   thd_.Join();
50 }
51 
ThreadManager(const char * name,grpc_resource_quota * resource_quota,int min_pollers,int max_pollers)52 ThreadManager::ThreadManager(const char* name,
53                              grpc_resource_quota* resource_quota,
54                              int min_pollers, int max_pollers)
55     : shutdown_(false),
56       num_pollers_(0),
57       min_pollers_(min_pollers),
58       max_pollers_(max_pollers == -1 ? INT_MAX : max_pollers),
59       num_threads_(0),
60       max_active_threads_sofar_(0) {
61   resource_user_ = grpc_resource_user_create(resource_quota, name);
62 }
63 
~ThreadManager()64 ThreadManager::~ThreadManager() {
65   {
66     grpc_core::MutexLock lock(&mu_);
67     GPR_ASSERT(num_threads_ == 0);
68   }
69 
70   grpc_core::ExecCtx exec_ctx;  // grpc_resource_user_unref needs an exec_ctx
71   grpc_resource_user_unref(resource_user_);
72   CleanupCompletedThreads();
73 }
74 
Wait()75 void ThreadManager::Wait() {
76   grpc_core::MutexLock lock(&mu_);
77   while (num_threads_ != 0) {
78     shutdown_cv_.Wait(&mu_);
79   }
80 }
81 
Shutdown()82 void ThreadManager::Shutdown() {
83   grpc_core::MutexLock lock(&mu_);
84   shutdown_ = true;
85 }
86 
IsShutdown()87 bool ThreadManager::IsShutdown() {
88   grpc_core::MutexLock lock(&mu_);
89   return shutdown_;
90 }
91 
GetMaxActiveThreadsSoFar()92 int ThreadManager::GetMaxActiveThreadsSoFar() {
93   grpc_core::MutexLock list_lock(&list_mu_);
94   return max_active_threads_sofar_;
95 }
96 
MarkAsCompleted(WorkerThread * thd)97 void ThreadManager::MarkAsCompleted(WorkerThread* thd) {
98   {
99     grpc_core::MutexLock list_lock(&list_mu_);
100     completed_threads_.push_back(thd);
101   }
102 
103   {
104     grpc_core::MutexLock lock(&mu_);
105     num_threads_--;
106     if (num_threads_ == 0) {
107       shutdown_cv_.Signal();
108     }
109   }
110 
111   // Give a thread back to the resource quota
112   grpc_resource_user_free_threads(resource_user_, 1);
113 }
114 
CleanupCompletedThreads()115 void ThreadManager::CleanupCompletedThreads() {
116   std::list<WorkerThread*> completed_threads;
117   {
118     // swap out the completed threads list: allows other threads to clean up
119     // more quickly
120     grpc_core::MutexLock lock(&list_mu_);
121     completed_threads.swap(completed_threads_);
122   }
123   for (auto thd : completed_threads) delete thd;
124 }
125 
Initialize()126 void ThreadManager::Initialize() {
127   if (!grpc_resource_user_allocate_threads(resource_user_, min_pollers_)) {
128     gpr_log(GPR_ERROR,
129             "No thread quota available to even create the minimum required "
130             "polling threads (i.e %d). Unable to start the thread manager",
131             min_pollers_);
132     abort();
133   }
134 
135   {
136     grpc_core::MutexLock lock(&mu_);
137     num_pollers_ = min_pollers_;
138     num_threads_ = min_pollers_;
139     max_active_threads_sofar_ = min_pollers_;
140   }
141 
142   for (int i = 0; i < min_pollers_; i++) {
143     WorkerThread* worker = new WorkerThread(this);
144     GPR_ASSERT(worker->created());  // Must be able to create the minimum
145     worker->Start();
146   }
147 }
148 
MainWorkLoop()149 void ThreadManager::MainWorkLoop() {
150   while (true) {
151     void* tag;
152     bool ok;
153     WorkStatus work_status = PollForWork(&tag, &ok);
154 
155     grpc_core::LockableAndReleasableMutexLock lock(&mu_);
156     // Reduce the number of pollers by 1 and check what happened with the poll
157     num_pollers_--;
158     bool done = false;
159     switch (work_status) {
160       case TIMEOUT:
161         // If we timed out and we have more pollers than we need (or we are
162         // shutdown), finish this thread
163         if (shutdown_ || num_pollers_ > max_pollers_) done = true;
164         break;
165       case SHUTDOWN:
166         // If the thread manager is shutdown, finish this thread
167         done = true;
168         break;
169       case WORK_FOUND:
170         // If we got work and there are now insufficient pollers and there is
171         // quota available to create a new thread, start a new poller thread
172         bool resource_exhausted = false;
173         if (!shutdown_ && num_pollers_ < min_pollers_) {
174           if (grpc_resource_user_allocate_threads(resource_user_, 1)) {
175             // We can allocate a new poller thread
176             num_pollers_++;
177             num_threads_++;
178             if (num_threads_ > max_active_threads_sofar_) {
179               max_active_threads_sofar_ = num_threads_;
180             }
181             // Drop lock before spawning thread to avoid contention
182             lock.Release();
183             WorkerThread* worker = new WorkerThread(this);
184             if (worker->created()) {
185               worker->Start();
186             } else {
187               // Get lock again to undo changes to poller/thread counters.
188               grpc_core::MutexLock failure_lock(&mu_);
189               num_pollers_--;
190               num_threads_--;
191               resource_exhausted = true;
192               delete worker;
193             }
194           } else if (num_pollers_ > 0) {
195             // There is still at least some thread polling, so we can go on
196             // even though we are below the number of pollers that we would
197             // like to have (min_pollers_)
198             lock.Release();
199           } else {
200             // There are no pollers to spare and we couldn't allocate
201             // a new thread, so resources are exhausted!
202             lock.Release();
203             resource_exhausted = true;
204           }
205         } else {
206           // There are a sufficient number of pollers available so we can do
207           // the work and continue polling with our existing poller threads
208           lock.Release();
209         }
210         // Lock is always released at this point - do the application work
211         // or return resource exhausted if there is new work but we couldn't
212         // get a thread in which to do it.
213         DoWork(tag, ok, !resource_exhausted);
214         // Take the lock again to check post conditions
215         lock.Lock();
216         // If we're shutdown, we should finish at this point.
217         if (shutdown_) done = true;
218         break;
219     }
220     // If we decided to finish the thread, break out of the while loop
221     if (done) break;
222 
223     // Otherwise go back to polling as long as it doesn't exceed max_pollers_
224     //
225     // **WARNING**:
226     // There is a possibility of threads thrashing here (i.e excessive thread
227     // shutdowns and creations than the ideal case). This happens if max_poller_
228     // count is small and the rate of incoming requests is also small. In such
229     // scenarios we can possibly configure max_pollers_ to a higher value and/or
230     // increase the cq timeout.
231     //
232     // However, not doing this check here and unconditionally incrementing
233     // num_pollers (and hoping that the system will eventually settle down) has
234     // far worse consequences i.e huge number of threads getting created to the
235     // point of thread-exhaustion. For example: if the incoming request rate is
236     // very high, all the polling threads will return very quickly from
237     // PollForWork() with WORK_FOUND. They all briefly decrement num_pollers_
238     // counter thereby possibly - and briefly - making it go below min_pollers;
239     // This will most likely result in the creation of a new poller since
240     // num_pollers_ dipped below min_pollers_.
241     //
242     // Now, If we didn't do the max_poller_ check here, all these threads will
243     // go back to doing PollForWork() and the whole cycle repeats (with a new
244     // thread being added in each cycle). Once the total number of threads in
245     // the system crosses a certain threshold (around ~1500), there is heavy
246     // contention on mutexes (the mu_ here or the mutexes in gRPC core like the
247     // pollset mutex) that makes DoWork() take longer to finish thereby causing
248     // new poller threads to be created even faster. This results in a thread
249     // avalanche.
250     if (num_pollers_ < max_pollers_) {
251       num_pollers_++;
252     } else {
253       break;
254     }
255   };
256 
257   // This thread is exiting. Do some cleanup work i.e delete already completed
258   // worker threads
259   CleanupCompletedThreads();
260 
261   // If we are here, either ThreadManager is shutting down or it already has
262   // enough threads.
263 }
264 
265 }  // namespace grpc
266