• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "TimerThread"
18 
19 #include <optional>
20 #include <sstream>
21 #include <unistd.h>
22 #include <vector>
23 
24 #include <mediautils/MediaUtilsDelayed.h>
25 #include <mediautils/TidWrapper.h>
26 #include <mediautils/TimerThread.h>
27 #include <utils/Log.h>
28 #include <utils/ThreadDefs.h>
29 
30 using namespace std::chrono_literals;
31 
32 namespace android::mediautils {
33 
34 extern std::string formatTime(std::chrono::system_clock::time_point t);
35 extern std::string_view timeSuffix(std::string_view time1, std::string_view time2);
36 
scheduleTask(std::string_view tag,TimerCallback && func,Duration timeoutDuration,Duration secondChanceDuration)37 TimerThread::Handle TimerThread::scheduleTask(
38         std::string_view tag, TimerCallback&& func,
39         Duration timeoutDuration, Duration secondChanceDuration) {
40     const auto now = std::chrono::system_clock::now();
41     auto request = std::make_shared<const Request>(now, now +
42             std::chrono::duration_cast<std::chrono::system_clock::duration>(timeoutDuration),
43             secondChanceDuration, getThreadIdWrapper(), tag);
44     return mMonitorThread.add(std::move(request), std::move(func), timeoutDuration);
45 }
46 
trackTask(std::string_view tag)47 TimerThread::Handle TimerThread::trackTask(std::string_view tag) {
48     const auto now = std::chrono::system_clock::now();
49     auto request = std::make_shared<const Request>(now, now,
50             Duration{} /* secondChanceDuration */, getThreadIdWrapper(), tag);
51     return mNoTimeoutMap.add(std::move(request));
52 }
53 
cancelTask(Handle handle)54 bool TimerThread::cancelTask(Handle handle) {
55     std::shared_ptr<const Request> request = isNoTimeoutHandle(handle) ?
56              mNoTimeoutMap.remove(handle) : mMonitorThread.remove(handle);
57     if (!request) return false;
58     mRetiredQueue.add(std::move(request));
59     return true;
60 }
61 
62 
toString() const63 std::string TimerThread::SnapshotAnalysis::toString() const {
64     // Note: These request queues are snapshot very close together but
65     // not at "identical" times as we don't use a class-wide lock.
66     std::string analysisSummary = std::string("\nanalysis [ ").append(description).append(" ]");
67     std::string timeoutStack;
68     std::string blockedStack;
69     if (timeoutTid != -1) {
70         timeoutStack = std::string(suspectTid == timeoutTid ? "\ntimeout/blocked(" : "\ntimeout(")
71                 .append(std::to_string(timeoutTid)).append(") callstack [\n")
72                 .append(getCallStackStringForTid(timeoutTid)).append("]");
73     }
74 
75     if (suspectTid != -1 && suspectTid != timeoutTid) {
76         blockedStack = std::string("\nblocked(")
77                 .append(std::to_string(suspectTid)).append(")  callstack [\n")
78                 .append(getCallStackStringForTid(suspectTid)).append("]");
79     }
80 
81     return std::string("now ")
82             .append(formatTime(std::chrono::system_clock::now()))
83             .append("\nsecondChanceCount ")
84             .append(std::to_string(secondChanceCount))
85             .append(analysisSummary)
86             .append("\ntimeout [ ")
87             .append(requestsToString(timeoutRequests))
88             .append(" ]\npending [ ")
89             .append(requestsToString(pendingRequests))
90             .append(" ]\nretired [ ")
91             .append(requestsToString(retiredRequests))
92             .append(" ]")
93             .append(timeoutStack)
94             .append(blockedStack);
95 }
96 
97 // A HAL method is where the substring "Hidl" is in the class name.
98 // The tag should look like: ... Hidl ... :: ...
99 // When the audio HAL is updated to AIDL perhaps we will use instead
100 // a global directory of HAL classes.
101 //
102 // See MethodStatistics.cpp:
103 // mediautils::getStatisticsClassesForModule(METHOD_STATISTICS_MODULE_NAME_AUDIO_HIDL)
104 //
105 /* static */
isRequestFromHal(const std::shared_ptr<const Request> & request)106 bool TimerThread::isRequestFromHal(const std::shared_ptr<const Request>& request) {
107     const size_t hidlPos = request->tag.asStringView().find("Hidl");
108     if (hidlPos == std::string::npos) return false;
109     // should be a separator afterwards Hidl which indicates the string was in the class.
110     const size_t separatorPos = request->tag.asStringView().find("::", hidlPos);
111     return separatorPos != std::string::npos;
112 }
113 
getSnapshotAnalysis(size_t retiredCount) const114 struct TimerThread::SnapshotAnalysis TimerThread::getSnapshotAnalysis(size_t retiredCount) const {
115     struct SnapshotAnalysis analysis{};
116     // The following snapshot of the TimerThread state will be utilized for
117     // analysis. Note, there is no lock around these calls, so there could be
118     // a state update between them.
119     mTimeoutQueue.copyRequests(analysis.timeoutRequests);
120     mRetiredQueue.copyRequests(analysis.retiredRequests, retiredCount);
121     analysis.pendingRequests = getPendingRequests();
122     analysis.secondChanceCount = mMonitorThread.getSecondChanceCount();
123     // No call has timed out, so there is no analysis to be done.
124     if (analysis.timeoutRequests.empty())
125         return analysis;
126     // for now look at last timeout (in our case, the only timeout)
127     const std::shared_ptr<const Request> timeout = analysis.timeoutRequests.back();
128     analysis.timeoutTid = timeout->tid;
129     if (analysis.pendingRequests.empty())
130       return analysis;
131     // pending Requests that are problematic.
132     std::vector<std::shared_ptr<const Request>> pendingExact;
133     std::vector<std::shared_ptr<const Request>> pendingPossible;
134 
135     // We look at pending requests that were scheduled no later than kPendingDuration
136     // after the timeout request. This prevents false matches with calls
137     // that naturally block for a short period of time
138     // such as HAL write() and read().
139     //
140     constexpr Duration kPendingDuration = 1000ms;
141     for (const auto& pending : analysis.pendingRequests) {
142         // If the pending tid is the same as timeout tid, problem identified.
143         if (pending->tid == timeout->tid) {
144             pendingExact.emplace_back(pending);
145             continue;
146         }
147 
148         // if the pending tid is scheduled within time limit
149         if (pending->scheduled - timeout->scheduled < kPendingDuration) {
150             pendingPossible.emplace_back(pending);
151         }
152     }
153 
154     std::string& description = analysis.description;
155     if (!pendingExact.empty()) {
156         const auto& request = pendingExact.front();
157         const bool hal = isRequestFromHal(request);
158 
159         if (hal) {
160             description = std::string("Blocked directly due to HAL call: ")
161                 .append(request->toString());
162             analysis.suspectTid= request->tid;
163         }
164     }
165     if (description.empty() && !pendingPossible.empty()) {
166         for (const auto& request : pendingPossible) {
167             const bool hal = isRequestFromHal(request);
168             if (hal) {
169                 // The first blocked call is the most likely one.
170                 // Recent calls might be temporarily blocked
171                 // calls such as write() or read() depending on kDuration.
172                 description = std::string("Blocked possibly due to HAL call: ")
173                     .append(request->toString());
174                 analysis.suspectTid= request->tid;
175             }
176        }
177     }
178     return analysis;
179 }
180 
getPendingRequests() const181 std::vector<std::shared_ptr<const TimerThread::Request>> TimerThread::getPendingRequests() const {
182     constexpr size_t kEstimatedPendingRequests = 8;  // approx 128 byte alloc.
183     std::vector<std::shared_ptr<const Request>> pendingRequests;
184     pendingRequests.reserve(kEstimatedPendingRequests); // preallocate vector out of lock.
185 
186     // following are internally locked calls, which add to our local pendingRequests.
187     mMonitorThread.copyRequests(pendingRequests);
188     mNoTimeoutMap.copyRequests(pendingRequests);
189 
190     // Sort in order of scheduled time.
191     std::sort(pendingRequests.begin(), pendingRequests.end(),
192         [](const std::shared_ptr<const Request>& r1,
193            const std::shared_ptr<const Request>& r2) {
194                return r1->scheduled < r2->scheduled;
195            });
196     return pendingRequests;
197 }
198 
pendingToString() const199 std::string TimerThread::pendingToString() const {
200     return requestsToString(getPendingRequests());
201 }
202 
retiredToString(size_t n) const203 std::string TimerThread::retiredToString(size_t n) const {
204     std::vector<std::shared_ptr<const Request>> retiredRequests;
205     mRetiredQueue.copyRequests(retiredRequests, n);
206 
207     // Dump to string
208     return requestsToString(retiredRequests);
209 }
210 
timeoutToString(size_t n) const211 std::string TimerThread::timeoutToString(size_t n) const {
212     std::vector<std::shared_ptr<const Request>> timeoutRequests;
213     mTimeoutQueue.copyRequests(timeoutRequests, n);
214 
215     // Dump to string
216     return requestsToString(timeoutRequests);
217 }
218 
toString() const219 std::string TimerThread::Request::toString() const {
220     const auto scheduledString = formatTime(scheduled);
221     const auto deadlineString = formatTime(deadline);
222     return std::string(tag)
223         .append(" scheduled ").append(scheduledString)
224         .append(" deadline ").append(timeSuffix(scheduledString, deadlineString))
225         .append(" tid ").append(std::to_string(tid));
226 }
227 
add(std::shared_ptr<const Request> request)228 void TimerThread::RequestQueue::add(std::shared_ptr<const Request> request) {
229     std::lock_guard lg(mRQMutex);
230     mRequestQueue.emplace_back(std::chrono::system_clock::now(), std::move(request));
231     if (mRequestQueue.size() > mRequestQueueMax) {
232         mRequestQueue.pop_front();
233     }
234 }
235 
copyRequests(std::vector<std::shared_ptr<const Request>> & requests,size_t n) const236 void TimerThread::RequestQueue::copyRequests(
237         std::vector<std::shared_ptr<const Request>>& requests, size_t n) const {
238     std::lock_guard lg(mRQMutex);
239     const size_t size = mRequestQueue.size();
240     size_t i = n >=  size ? 0 : size - n;
241     for (; i < size; ++i) {
242         const auto &[time, request] = mRequestQueue[i];
243         requests.emplace_back(request);
244     }
245 }
246 
add(std::shared_ptr<const Request> request)247 TimerThread::Handle TimerThread::NoTimeoutMap::add(std::shared_ptr<const Request> request) {
248     std::lock_guard lg(mNTMutex);
249     // A unique handle is obtained by mNoTimeoutRequests.fetch_add(1),
250     // This need not be under a lock, but we do so anyhow.
251     const Handle handle = getUniqueHandle_l();
252     mMap[handle] = request;
253     return handle;
254 }
255 
remove(Handle handle)256 std::shared_ptr<const TimerThread::Request> TimerThread::NoTimeoutMap::remove(Handle handle) {
257     std::lock_guard lg(mNTMutex);
258     auto it = mMap.find(handle);
259     if (it == mMap.end()) return {};
260     auto request = it->second;
261     mMap.erase(it);
262     return request;
263 }
264 
copyRequests(std::vector<std::shared_ptr<const Request>> & requests) const265 void TimerThread::NoTimeoutMap::copyRequests(
266         std::vector<std::shared_ptr<const Request>>& requests) const {
267     std::lock_guard lg(mNTMutex);
268     for (const auto &[handle, request] : mMap) {
269         requests.emplace_back(request);
270     }
271 }
272 
MonitorThread(RequestQueue & timeoutQueue)273 TimerThread::MonitorThread::MonitorThread(RequestQueue& timeoutQueue)
274         : mTimeoutQueue(timeoutQueue)
275         , mThread([this] { threadFunc(); }) {
276      pthread_setname_np(mThread.native_handle(), "TimerThread");
277      pthread_setschedprio(mThread.native_handle(), PRIORITY_URGENT_AUDIO);
278 }
279 
~MonitorThread()280 TimerThread::MonitorThread::~MonitorThread() {
281     {
282         std::lock_guard _l(mMutex);
283         mShouldExit = true;
284         mCond.notify_all();
285     }
286     mThread.join();
287 }
288 
threadFunc()289 void TimerThread::MonitorThread::threadFunc() {
290     std::unique_lock _l(mMutex);
291     ::android::base::ScopedLockAssertion lock_assertion(mMutex);
292     while (!mShouldExit) {
293         Handle nextDeadline = INVALID_HANDLE;
294         Handle now = INVALID_HANDLE;
295         if (!mMonitorRequests.empty()) {
296             nextDeadline = mMonitorRequests.begin()->first;
297             now = std::chrono::steady_clock::now();
298             if (nextDeadline < now) {
299                 auto node = mMonitorRequests.extract(mMonitorRequests.begin());
300                 // Deadline has expired, handle the request.
301                 auto secondChanceDuration = node.mapped().first->secondChanceDuration;
302                 if (secondChanceDuration.count() != 0) {
303                     // We now apply the second chance duration to find the clock
304                     // monotonic second deadline.  The unique key is then the
305                     // pair<second_deadline, first_deadline>.
306                     //
307                     // The second chance prevents a false timeout should there be
308                     // any clock monotonic advancement during suspend.
309                     auto newHandle = now + secondChanceDuration;
310                     ALOGD("%s: TimeCheck second chance applied for %s",
311                             __func__, node.mapped().first->tag.c_str()); // should be rare event.
312                     mSecondChanceRequests.emplace_hint(mSecondChanceRequests.end(),
313                             std::make_pair(newHandle, nextDeadline),
314                             std::move(node.mapped()));
315                     // increment second chance counter.
316                     mSecondChanceCount.fetch_add(1 /* arg */, std::memory_order_relaxed);
317                 } else {
318                     {
319                         _l.unlock();
320                         // We add Request to retired queue early so that it can be dumped out.
321                         mTimeoutQueue.add(std::move(node.mapped().first));
322                         node.mapped().second(nextDeadline);
323                         // Caution: we don't hold lock when we call TimerCallback,
324                         // but this is the timeout case!  We will crash soon,
325                         // maybe before returning.
326                         // anything left over is released here outside lock.
327                     }
328                     // reacquire the lock - if something was added, we loop immediately to check.
329                     _l.lock();
330                 }
331                 // always process expiring monitor requests first.
332                 continue;
333             }
334         }
335         // now process any second chance requests.
336         if (!mSecondChanceRequests.empty()) {
337             Handle secondDeadline = mSecondChanceRequests.begin()->first.first;
338             if (now == INVALID_HANDLE) now = std::chrono::steady_clock::now();
339             if (secondDeadline < now) {
340                 auto node = mSecondChanceRequests.extract(mSecondChanceRequests.begin());
341                 {
342                     _l.unlock();
343                     // We add Request to retired queue early so that it can be dumped out.
344                     mTimeoutQueue.add(std::move(node.mapped().first));
345                     const Handle originalHandle = node.key().second;
346                     node.mapped().second(originalHandle);
347                     // Caution: we don't hold lock when we call TimerCallback.
348                     // This is benign issue - we permit concurrent operations
349                     // while in the callback to the MonitorQueue.
350                     //
351                     // Anything left over is released here outside lock.
352                 }
353                 // reacquire the lock - if something was added, we loop immediately to check.
354                 _l.lock();
355                 continue;
356             }
357             // update the deadline.
358             if (nextDeadline == INVALID_HANDLE) {
359                 nextDeadline = secondDeadline;
360             } else {
361                 nextDeadline = std::min(nextDeadline, secondDeadline);
362             }
363         }
364         if (nextDeadline != INVALID_HANDLE) {
365             mCond.wait_until(_l, nextDeadline);
366         } else {
367             mCond.wait(_l);
368         }
369     }
370 }
371 
add(std::shared_ptr<const Request> request,TimerCallback && func,Duration timeout)372 TimerThread::Handle TimerThread::MonitorThread::add(
373         std::shared_ptr<const Request> request, TimerCallback&& func, Duration timeout) {
374     std::lock_guard _l(mMutex);
375     const Handle handle = getUniqueHandle_l(timeout);
376     mMonitorRequests.emplace_hint(mMonitorRequests.end(),
377             handle, std::make_pair(std::move(request), std::move(func)));
378     mCond.notify_all();
379     return handle;
380 }
381 
remove(Handle handle)382 std::shared_ptr<const TimerThread::Request> TimerThread::MonitorThread::remove(Handle handle) {
383     std::pair<std::shared_ptr<const Request>, TimerCallback> data;
384     std::unique_lock ul(mMutex);
385     ::android::base::ScopedLockAssertion lock_assertion(mMutex);
386     if (const auto it = mMonitorRequests.find(handle);
387         it != mMonitorRequests.end()) {
388         data = std::move(it->second);
389         mMonitorRequests.erase(it);
390         ul.unlock();  // manually release lock here so func (data.second)
391                       // is released outside of lock.
392         return data.first;  // request
393     }
394 
395     // this check is O(N), but since the second chance requests are ordered
396     // in terms of earliest expiration time, we would expect better than average results.
397     for (auto it = mSecondChanceRequests.begin(); it != mSecondChanceRequests.end(); ++it) {
398         if (it->first.second == handle) {
399             data = std::move(it->second);
400             mSecondChanceRequests.erase(it);
401             ul.unlock();  // manually release lock here so func (data.second)
402                           // is released outside of lock.
403             return data.first; // request
404         }
405     }
406     return {};
407 }
408 
copyRequests(std::vector<std::shared_ptr<const Request>> & requests) const409 void TimerThread::MonitorThread::copyRequests(
410         std::vector<std::shared_ptr<const Request>>& requests) const {
411     std::lock_guard lg(mMutex);
412     for (const auto &[deadline, monitorpair] : mMonitorRequests) {
413         requests.emplace_back(monitorpair.first);
414     }
415     // we combine the second map with the first map - this is
416     // everything that is pending on the monitor thread.
417     // The second map will be older than the first map so this
418     // is in order.
419     for (const auto &[deadline, monitorpair] : mSecondChanceRequests) {
420         requests.emplace_back(monitorpair.first);
421     }
422 }
423 
424 }  // namespace android::mediautils
425