• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "TimerThread"
18 
19 #include <optional>
20 #include <sstream>
21 #include <unistd.h>
22 #include <vector>
23 
24 #include <mediautils/MediaUtilsDelayed.h>
25 #include <mediautils/TimerThread.h>
26 #include <utils/Log.h>
27 #include <utils/ThreadDefs.h>
28 
29 using namespace std::chrono_literals;
30 
31 namespace android::mediautils {
32 
33 extern std::string formatTime(std::chrono::system_clock::time_point t);
34 extern std::string_view timeSuffix(std::string_view time1, std::string_view time2);
35 
scheduleTask(std::string_view tag,TimerCallback && func,Duration timeoutDuration,Duration secondChanceDuration)36 TimerThread::Handle TimerThread::scheduleTask(
37         std::string_view tag, TimerCallback&& func,
38         Duration timeoutDuration, Duration secondChanceDuration) {
39     const auto now = std::chrono::system_clock::now();
40     auto request = std::make_shared<const Request>(now, now +
41             std::chrono::duration_cast<std::chrono::system_clock::duration>(timeoutDuration),
42             secondChanceDuration, gettid(), tag);
43     return mMonitorThread.add(std::move(request), std::move(func), timeoutDuration);
44 }
45 
trackTask(std::string_view tag)46 TimerThread::Handle TimerThread::trackTask(std::string_view tag) {
47     const auto now = std::chrono::system_clock::now();
48     auto request = std::make_shared<const Request>(now, now,
49             Duration{} /* secondChanceDuration */, gettid(), tag);
50     return mNoTimeoutMap.add(std::move(request));
51 }
52 
cancelTask(Handle handle)53 bool TimerThread::cancelTask(Handle handle) {
54     std::shared_ptr<const Request> request = isNoTimeoutHandle(handle) ?
55              mNoTimeoutMap.remove(handle) : mMonitorThread.remove(handle);
56     if (!request) return false;
57     mRetiredQueue.add(std::move(request));
58     return true;
59 }
60 
toString(size_t retiredCount) const61 std::string TimerThread::toString(size_t retiredCount) const {
62     // Note: These request queues are snapshot very close together but
63     // not at "identical" times as we don't use a class-wide lock.
64 
65     std::vector<std::shared_ptr<const Request>> timeoutRequests;
66     std::vector<std::shared_ptr<const Request>> retiredRequests;
67     mTimeoutQueue.copyRequests(timeoutRequests);
68     mRetiredQueue.copyRequests(retiredRequests, retiredCount);
69     std::vector<std::shared_ptr<const Request>> pendingRequests =
70         getPendingRequests();
71 
72     struct Analysis analysis = analyzeTimeout(timeoutRequests, pendingRequests);
73     std::string analysisSummary;
74     if (!analysis.summary.empty()) {
75         analysisSummary = std::string("\nanalysis [ ").append(analysis.summary).append(" ]");
76     }
77     std::string timeoutStack;
78     if (analysis.timeoutTid != -1) {
79         timeoutStack = std::string("\ntimeout(")
80                 .append(std::to_string(analysis.timeoutTid)).append(") callstack [\n")
81                 .append(getCallStackStringForTid(analysis.timeoutTid)).append("]");
82     }
83     std::string blockedStack;
84     if (analysis.HALBlockedTid != -1) {
85         blockedStack = std::string("\nblocked(")
86                 .append(std::to_string(analysis.HALBlockedTid)).append(")  callstack [\n")
87                 .append(getCallStackStringForTid(analysis.HALBlockedTid)).append("]");
88     }
89 
90     return std::string("now ")
91             .append(formatTime(std::chrono::system_clock::now()))
92             .append("\nsecondChanceCount ")
93             .append(std::to_string(mMonitorThread.getSecondChanceCount()))
94             .append(analysisSummary)
95             .append("\ntimeout [ ")
96             .append(requestsToString(timeoutRequests))
97             .append(" ]\npending [ ")
98             .append(requestsToString(pendingRequests))
99             .append(" ]\nretired [ ")
100             .append(requestsToString(retiredRequests))
101             .append(" ]")
102             .append(timeoutStack)
103             .append(blockedStack);
104 }
105 
106 // A HAL method is where the substring "Hidl" is in the class name.
107 // The tag should look like: ... Hidl ... :: ...
108 // When the audio HAL is updated to AIDL perhaps we will use instead
109 // a global directory of HAL classes.
110 //
111 // See MethodStatistics.cpp:
112 // mediautils::getStatisticsClassesForModule(METHOD_STATISTICS_MODULE_NAME_AUDIO_HIDL)
113 //
114 /* static */
isRequestFromHal(const std::shared_ptr<const Request> & request)115 bool TimerThread::isRequestFromHal(const std::shared_ptr<const Request>& request) {
116     const size_t hidlPos = request->tag.asStringView().find("Hidl");
117     if (hidlPos == std::string::npos) return false;
118     // should be a separator afterwards Hidl which indicates the string was in the class.
119     const size_t separatorPos = request->tag.asStringView().find("::", hidlPos);
120     return separatorPos != std::string::npos;
121 }
122 
123 /* static */
analyzeTimeout(const std::vector<std::shared_ptr<const Request>> & timeoutRequests,const std::vector<std::shared_ptr<const Request>> & pendingRequests)124 struct TimerThread::Analysis TimerThread::analyzeTimeout(
125     const std::vector<std::shared_ptr<const Request>>& timeoutRequests,
126     const std::vector<std::shared_ptr<const Request>>& pendingRequests) {
127 
128     if (timeoutRequests.empty() || pendingRequests.empty()) return {}; // nothing to say.
129 
130     // for now look at last timeout (in our case, the only timeout)
131     const std::shared_ptr<const Request> timeout = timeoutRequests.back();
132 
133     // pending Requests that are problematic.
134     std::vector<std::shared_ptr<const Request>> pendingExact;
135     std::vector<std::shared_ptr<const Request>> pendingPossible;
136 
137     // We look at pending requests that were scheduled no later than kPendingDuration
138     // after the timeout request. This prevents false matches with calls
139     // that naturally block for a short period of time
140     // such as HAL write() and read().
141     //
142     constexpr Duration kPendingDuration = 1000ms;
143     for (const auto& pending : pendingRequests) {
144         // If the pending tid is the same as timeout tid, problem identified.
145         if (pending->tid == timeout->tid) {
146             pendingExact.emplace_back(pending);
147             continue;
148         }
149 
150         // if the pending tid is scheduled within time limit
151         if (pending->scheduled - timeout->scheduled < kPendingDuration) {
152             pendingPossible.emplace_back(pending);
153         }
154     }
155 
156     struct Analysis analysis{};
157 
158     analysis.timeoutTid = timeout->tid;
159     std::string& summary = analysis.summary;
160     if (!pendingExact.empty()) {
161         const auto& request = pendingExact.front();
162         const bool hal = isRequestFromHal(request);
163 
164         if (hal) {
165             summary = std::string("Blocked directly due to HAL call: ")
166                 .append(request->toString());
167         }
168     }
169     if (summary.empty() && !pendingPossible.empty()) {
170         for (const auto& request : pendingPossible) {
171             const bool hal = isRequestFromHal(request);
172             if (hal) {
173                 // The first blocked call is the most likely one.
174                 // Recent calls might be temporarily blocked
175                 // calls such as write() or read() depending on kDuration.
176                 summary = std::string("Blocked possibly due to HAL call: ")
177                     .append(request->toString());
178                 analysis.HALBlockedTid = request->tid;
179             }
180        }
181     }
182     return analysis;
183 }
184 
getPendingRequests() const185 std::vector<std::shared_ptr<const TimerThread::Request>> TimerThread::getPendingRequests() const {
186     constexpr size_t kEstimatedPendingRequests = 8;  // approx 128 byte alloc.
187     std::vector<std::shared_ptr<const Request>> pendingRequests;
188     pendingRequests.reserve(kEstimatedPendingRequests); // preallocate vector out of lock.
189 
190     // following are internally locked calls, which add to our local pendingRequests.
191     mMonitorThread.copyRequests(pendingRequests);
192     mNoTimeoutMap.copyRequests(pendingRequests);
193 
194     // Sort in order of scheduled time.
195     std::sort(pendingRequests.begin(), pendingRequests.end(),
196         [](const std::shared_ptr<const Request>& r1,
197            const std::shared_ptr<const Request>& r2) {
198                return r1->scheduled < r2->scheduled;
199            });
200     return pendingRequests;
201 }
202 
pendingToString() const203 std::string TimerThread::pendingToString() const {
204     return requestsToString(getPendingRequests());
205 }
206 
retiredToString(size_t n) const207 std::string TimerThread::retiredToString(size_t n) const {
208     std::vector<std::shared_ptr<const Request>> retiredRequests;
209     mRetiredQueue.copyRequests(retiredRequests, n);
210 
211     // Dump to string
212     return requestsToString(retiredRequests);
213 }
214 
timeoutToString(size_t n) const215 std::string TimerThread::timeoutToString(size_t n) const {
216     std::vector<std::shared_ptr<const Request>> timeoutRequests;
217     mTimeoutQueue.copyRequests(timeoutRequests, n);
218 
219     // Dump to string
220     return requestsToString(timeoutRequests);
221 }
222 
toString() const223 std::string TimerThread::Request::toString() const {
224     const auto scheduledString = formatTime(scheduled);
225     const auto deadlineString = formatTime(deadline);
226     return std::string(tag)
227         .append(" scheduled ").append(scheduledString)
228         .append(" deadline ").append(timeSuffix(scheduledString, deadlineString))
229         .append(" tid ").append(std::to_string(tid));
230 }
231 
add(std::shared_ptr<const Request> request)232 void TimerThread::RequestQueue::add(std::shared_ptr<const Request> request) {
233     std::lock_guard lg(mRQMutex);
234     mRequestQueue.emplace_back(std::chrono::system_clock::now(), std::move(request));
235     if (mRequestQueue.size() > mRequestQueueMax) {
236         mRequestQueue.pop_front();
237     }
238 }
239 
copyRequests(std::vector<std::shared_ptr<const Request>> & requests,size_t n) const240 void TimerThread::RequestQueue::copyRequests(
241         std::vector<std::shared_ptr<const Request>>& requests, size_t n) const {
242     std::lock_guard lg(mRQMutex);
243     const size_t size = mRequestQueue.size();
244     size_t i = n >=  size ? 0 : size - n;
245     for (; i < size; ++i) {
246         const auto &[time, request] = mRequestQueue[i];
247         requests.emplace_back(request);
248     }
249 }
250 
add(std::shared_ptr<const Request> request)251 TimerThread::Handle TimerThread::NoTimeoutMap::add(std::shared_ptr<const Request> request) {
252     std::lock_guard lg(mNTMutex);
253     // A unique handle is obtained by mNoTimeoutRequests.fetch_add(1),
254     // This need not be under a lock, but we do so anyhow.
255     const Handle handle = getUniqueHandle_l();
256     mMap[handle] = request;
257     return handle;
258 }
259 
remove(Handle handle)260 std::shared_ptr<const TimerThread::Request> TimerThread::NoTimeoutMap::remove(Handle handle) {
261     std::lock_guard lg(mNTMutex);
262     auto it = mMap.find(handle);
263     if (it == mMap.end()) return {};
264     auto request = it->second;
265     mMap.erase(it);
266     return request;
267 }
268 
copyRequests(std::vector<std::shared_ptr<const Request>> & requests) const269 void TimerThread::NoTimeoutMap::copyRequests(
270         std::vector<std::shared_ptr<const Request>>& requests) const {
271     std::lock_guard lg(mNTMutex);
272     for (const auto &[handle, request] : mMap) {
273         requests.emplace_back(request);
274     }
275 }
276 
MonitorThread(RequestQueue & timeoutQueue)277 TimerThread::MonitorThread::MonitorThread(RequestQueue& timeoutQueue)
278         : mTimeoutQueue(timeoutQueue)
279         , mThread([this] { threadFunc(); }) {
280      pthread_setname_np(mThread.native_handle(), "TimerThread");
281      pthread_setschedprio(mThread.native_handle(), PRIORITY_URGENT_AUDIO);
282 }
283 
~MonitorThread()284 TimerThread::MonitorThread::~MonitorThread() {
285     {
286         std::lock_guard _l(mMutex);
287         mShouldExit = true;
288         mCond.notify_all();
289     }
290     mThread.join();
291 }
292 
threadFunc()293 void TimerThread::MonitorThread::threadFunc() {
294     std::unique_lock _l(mMutex);
295     while (!mShouldExit) {
296         Handle nextDeadline = INVALID_HANDLE;
297         Handle now = INVALID_HANDLE;
298         if (!mMonitorRequests.empty()) {
299             nextDeadline = mMonitorRequests.begin()->first;
300             now = std::chrono::steady_clock::now();
301             if (nextDeadline < now) {
302                 auto node = mMonitorRequests.extract(mMonitorRequests.begin());
303                 // Deadline has expired, handle the request.
304                 auto secondChanceDuration = node.mapped().first->secondChanceDuration;
305                 if (secondChanceDuration.count() != 0) {
306                     // We now apply the second chance duration to find the clock
307                     // monotonic second deadline.  The unique key is then the
308                     // pair<second_deadline, first_deadline>.
309                     //
310                     // The second chance prevents a false timeout should there be
311                     // any clock monotonic advancement during suspend.
312                     auto newHandle = now + secondChanceDuration;
313                     ALOGD("%s: TimeCheck second chance applied for %s",
314                             __func__, node.mapped().first->tag.c_str()); // should be rare event.
315                     mSecondChanceRequests.emplace_hint(mSecondChanceRequests.end(),
316                             std::make_pair(newHandle, nextDeadline),
317                             std::move(node.mapped()));
318                     // increment second chance counter.
319                     mSecondChanceCount.fetch_add(1 /* arg */, std::memory_order_relaxed);
320                 } else {
321                     {
322                         _l.unlock();
323                         // We add Request to retired queue early so that it can be dumped out.
324                         mTimeoutQueue.add(std::move(node.mapped().first));
325                         node.mapped().second(nextDeadline);
326                         // Caution: we don't hold lock when we call TimerCallback,
327                         // but this is the timeout case!  We will crash soon,
328                         // maybe before returning.
329                         // anything left over is released here outside lock.
330                     }
331                     // reacquire the lock - if something was added, we loop immediately to check.
332                     _l.lock();
333                 }
334                 // always process expiring monitor requests first.
335                 continue;
336             }
337         }
338         // now process any second chance requests.
339         if (!mSecondChanceRequests.empty()) {
340             Handle secondDeadline = mSecondChanceRequests.begin()->first.first;
341             if (now == INVALID_HANDLE) now = std::chrono::steady_clock::now();
342             if (secondDeadline < now) {
343                 auto node = mSecondChanceRequests.extract(mSecondChanceRequests.begin());
344                 {
345                     _l.unlock();
346                     // We add Request to retired queue early so that it can be dumped out.
347                     mTimeoutQueue.add(std::move(node.mapped().first));
348                     const Handle originalHandle = node.key().second;
349                     node.mapped().second(originalHandle);
350                     // Caution: we don't hold lock when we call TimerCallback.
351                     // This is benign issue - we permit concurrent operations
352                     // while in the callback to the MonitorQueue.
353                     //
354                     // Anything left over is released here outside lock.
355                 }
356                 // reacquire the lock - if something was added, we loop immediately to check.
357                 _l.lock();
358                 continue;
359             }
360             // update the deadline.
361             if (nextDeadline == INVALID_HANDLE) {
362                 nextDeadline = secondDeadline;
363             } else {
364                 nextDeadline = std::min(nextDeadline, secondDeadline);
365             }
366         }
367         if (nextDeadline != INVALID_HANDLE) {
368             mCond.wait_until(_l, nextDeadline);
369         } else {
370             mCond.wait(_l);
371         }
372     }
373 }
374 
add(std::shared_ptr<const Request> request,TimerCallback && func,Duration timeout)375 TimerThread::Handle TimerThread::MonitorThread::add(
376         std::shared_ptr<const Request> request, TimerCallback&& func, Duration timeout) {
377     std::lock_guard _l(mMutex);
378     const Handle handle = getUniqueHandle_l(timeout);
379     mMonitorRequests.emplace_hint(mMonitorRequests.end(),
380             handle, std::make_pair(std::move(request), std::move(func)));
381     mCond.notify_all();
382     return handle;
383 }
384 
remove(Handle handle)385 std::shared_ptr<const TimerThread::Request> TimerThread::MonitorThread::remove(Handle handle) {
386     std::pair<std::shared_ptr<const Request>, TimerCallback> data;
387     std::unique_lock ul(mMutex);
388     if (const auto it = mMonitorRequests.find(handle);
389         it != mMonitorRequests.end()) {
390         data = std::move(it->second);
391         mMonitorRequests.erase(it);
392         ul.unlock();  // manually release lock here so func (data.second)
393                       // is released outside of lock.
394         return data.first;  // request
395     }
396 
397     // this check is O(N), but since the second chance requests are ordered
398     // in terms of earliest expiration time, we would expect better than average results.
399     for (auto it = mSecondChanceRequests.begin(); it != mSecondChanceRequests.end(); ++it) {
400         if (it->first.second == handle) {
401             data = std::move(it->second);
402             mSecondChanceRequests.erase(it);
403             ul.unlock();  // manually release lock here so func (data.second)
404                           // is released outside of lock.
405             return data.first; // request
406         }
407     }
408     return {};
409 }
410 
copyRequests(std::vector<std::shared_ptr<const Request>> & requests) const411 void TimerThread::MonitorThread::copyRequests(
412         std::vector<std::shared_ptr<const Request>>& requests) const {
413     std::lock_guard lg(mMutex);
414     for (const auto &[deadline, monitorpair] : mMonitorRequests) {
415         requests.emplace_back(monitorpair.first);
416     }
417     // we combine the second map with the first map - this is
418     // everything that is pending on the monitor thread.
419     // The second map will be older than the first map so this
420     // is in order.
421     for (const auto &[deadline, monitorpair] : mSecondChanceRequests) {
422         requests.emplace_back(monitorpair.first);
423     }
424 }
425 
426 }  // namespace android::mediautils
427