/* * Copyright (C) 2021 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define LOG_TAG "TimerThread" #include #include #include #include #include #include #include #include using namespace std::chrono_literals; namespace android::mediautils { extern std::string formatTime(std::chrono::system_clock::time_point t); extern std::string_view timeSuffix(std::string_view time1, std::string_view time2); TimerThread::Handle TimerThread::scheduleTask( std::string_view tag, TimerCallback&& func, Duration timeoutDuration, Duration secondChanceDuration) { const auto now = std::chrono::system_clock::now(); auto request = std::make_shared(now, now + std::chrono::duration_cast(timeoutDuration), secondChanceDuration, gettid(), tag); return mMonitorThread.add(std::move(request), std::move(func), timeoutDuration); } TimerThread::Handle TimerThread::trackTask(std::string_view tag) { const auto now = std::chrono::system_clock::now(); auto request = std::make_shared(now, now, Duration{} /* secondChanceDuration */, gettid(), tag); return mNoTimeoutMap.add(std::move(request)); } bool TimerThread::cancelTask(Handle handle) { std::shared_ptr request = isNoTimeoutHandle(handle) ? mNoTimeoutMap.remove(handle) : mMonitorThread.remove(handle); if (!request) return false; mRetiredQueue.add(std::move(request)); return true; } std::string TimerThread::toString(size_t retiredCount) const { // Note: These request queues are snapshot very close together but // not at "identical" times as we don't use a class-wide lock. std::vector> timeoutRequests; std::vector> retiredRequests; mTimeoutQueue.copyRequests(timeoutRequests); mRetiredQueue.copyRequests(retiredRequests, retiredCount); std::vector> pendingRequests = getPendingRequests(); struct Analysis analysis = analyzeTimeout(timeoutRequests, pendingRequests); std::string analysisSummary; if (!analysis.summary.empty()) { analysisSummary = std::string("\nanalysis [ ").append(analysis.summary).append(" ]"); } std::string timeoutStack; if (analysis.timeoutTid != -1) { timeoutStack = std::string("\ntimeout(") .append(std::to_string(analysis.timeoutTid)).append(") callstack [\n") .append(getCallStackStringForTid(analysis.timeoutTid)).append("]"); } std::string blockedStack; if (analysis.HALBlockedTid != -1) { blockedStack = std::string("\nblocked(") .append(std::to_string(analysis.HALBlockedTid)).append(") callstack [\n") .append(getCallStackStringForTid(analysis.HALBlockedTid)).append("]"); } return std::string("now ") .append(formatTime(std::chrono::system_clock::now())) .append("\nsecondChanceCount ") .append(std::to_string(mMonitorThread.getSecondChanceCount())) .append(analysisSummary) .append("\ntimeout [ ") .append(requestsToString(timeoutRequests)) .append(" ]\npending [ ") .append(requestsToString(pendingRequests)) .append(" ]\nretired [ ") .append(requestsToString(retiredRequests)) .append(" ]") .append(timeoutStack) .append(blockedStack); } // A HAL method is where the substring "Hidl" is in the class name. // The tag should look like: ... Hidl ... :: ... // When the audio HAL is updated to AIDL perhaps we will use instead // a global directory of HAL classes. // // See MethodStatistics.cpp: // mediautils::getStatisticsClassesForModule(METHOD_STATISTICS_MODULE_NAME_AUDIO_HIDL) // /* static */ bool TimerThread::isRequestFromHal(const std::shared_ptr& request) { const size_t hidlPos = request->tag.asStringView().find("Hidl"); if (hidlPos == std::string::npos) return false; // should be a separator afterwards Hidl which indicates the string was in the class. const size_t separatorPos = request->tag.asStringView().find("::", hidlPos); return separatorPos != std::string::npos; } /* static */ struct TimerThread::Analysis TimerThread::analyzeTimeout( const std::vector>& timeoutRequests, const std::vector>& pendingRequests) { if (timeoutRequests.empty() || pendingRequests.empty()) return {}; // nothing to say. // for now look at last timeout (in our case, the only timeout) const std::shared_ptr timeout = timeoutRequests.back(); // pending Requests that are problematic. std::vector> pendingExact; std::vector> pendingPossible; // We look at pending requests that were scheduled no later than kPendingDuration // after the timeout request. This prevents false matches with calls // that naturally block for a short period of time // such as HAL write() and read(). // constexpr Duration kPendingDuration = 1000ms; for (const auto& pending : pendingRequests) { // If the pending tid is the same as timeout tid, problem identified. if (pending->tid == timeout->tid) { pendingExact.emplace_back(pending); continue; } // if the pending tid is scheduled within time limit if (pending->scheduled - timeout->scheduled < kPendingDuration) { pendingPossible.emplace_back(pending); } } struct Analysis analysis{}; analysis.timeoutTid = timeout->tid; std::string& summary = analysis.summary; if (!pendingExact.empty()) { const auto& request = pendingExact.front(); const bool hal = isRequestFromHal(request); if (hal) { summary = std::string("Blocked directly due to HAL call: ") .append(request->toString()); } } if (summary.empty() && !pendingPossible.empty()) { for (const auto& request : pendingPossible) { const bool hal = isRequestFromHal(request); if (hal) { // The first blocked call is the most likely one. // Recent calls might be temporarily blocked // calls such as write() or read() depending on kDuration. summary = std::string("Blocked possibly due to HAL call: ") .append(request->toString()); analysis.HALBlockedTid = request->tid; } } } return analysis; } std::vector> TimerThread::getPendingRequests() const { constexpr size_t kEstimatedPendingRequests = 8; // approx 128 byte alloc. std::vector> pendingRequests; pendingRequests.reserve(kEstimatedPendingRequests); // preallocate vector out of lock. // following are internally locked calls, which add to our local pendingRequests. mMonitorThread.copyRequests(pendingRequests); mNoTimeoutMap.copyRequests(pendingRequests); // Sort in order of scheduled time. std::sort(pendingRequests.begin(), pendingRequests.end(), [](const std::shared_ptr& r1, const std::shared_ptr& r2) { return r1->scheduled < r2->scheduled; }); return pendingRequests; } std::string TimerThread::pendingToString() const { return requestsToString(getPendingRequests()); } std::string TimerThread::retiredToString(size_t n) const { std::vector> retiredRequests; mRetiredQueue.copyRequests(retiredRequests, n); // Dump to string return requestsToString(retiredRequests); } std::string TimerThread::timeoutToString(size_t n) const { std::vector> timeoutRequests; mTimeoutQueue.copyRequests(timeoutRequests, n); // Dump to string return requestsToString(timeoutRequests); } std::string TimerThread::Request::toString() const { const auto scheduledString = formatTime(scheduled); const auto deadlineString = formatTime(deadline); return std::string(tag) .append(" scheduled ").append(scheduledString) .append(" deadline ").append(timeSuffix(scheduledString, deadlineString)) .append(" tid ").append(std::to_string(tid)); } void TimerThread::RequestQueue::add(std::shared_ptr request) { std::lock_guard lg(mRQMutex); mRequestQueue.emplace_back(std::chrono::system_clock::now(), std::move(request)); if (mRequestQueue.size() > mRequestQueueMax) { mRequestQueue.pop_front(); } } void TimerThread::RequestQueue::copyRequests( std::vector>& requests, size_t n) const { std::lock_guard lg(mRQMutex); const size_t size = mRequestQueue.size(); size_t i = n >= size ? 0 : size - n; for (; i < size; ++i) { const auto &[time, request] = mRequestQueue[i]; requests.emplace_back(request); } } TimerThread::Handle TimerThread::NoTimeoutMap::add(std::shared_ptr request) { std::lock_guard lg(mNTMutex); // A unique handle is obtained by mNoTimeoutRequests.fetch_add(1), // This need not be under a lock, but we do so anyhow. const Handle handle = getUniqueHandle_l(); mMap[handle] = request; return handle; } std::shared_ptr TimerThread::NoTimeoutMap::remove(Handle handle) { std::lock_guard lg(mNTMutex); auto it = mMap.find(handle); if (it == mMap.end()) return {}; auto request = it->second; mMap.erase(it); return request; } void TimerThread::NoTimeoutMap::copyRequests( std::vector>& requests) const { std::lock_guard lg(mNTMutex); for (const auto &[handle, request] : mMap) { requests.emplace_back(request); } } TimerThread::MonitorThread::MonitorThread(RequestQueue& timeoutQueue) : mTimeoutQueue(timeoutQueue) , mThread([this] { threadFunc(); }) { pthread_setname_np(mThread.native_handle(), "TimerThread"); pthread_setschedprio(mThread.native_handle(), PRIORITY_URGENT_AUDIO); } TimerThread::MonitorThread::~MonitorThread() { { std::lock_guard _l(mMutex); mShouldExit = true; mCond.notify_all(); } mThread.join(); } void TimerThread::MonitorThread::threadFunc() { std::unique_lock _l(mMutex); while (!mShouldExit) { Handle nextDeadline = INVALID_HANDLE; Handle now = INVALID_HANDLE; if (!mMonitorRequests.empty()) { nextDeadline = mMonitorRequests.begin()->first; now = std::chrono::steady_clock::now(); if (nextDeadline < now) { auto node = mMonitorRequests.extract(mMonitorRequests.begin()); // Deadline has expired, handle the request. auto secondChanceDuration = node.mapped().first->secondChanceDuration; if (secondChanceDuration.count() != 0) { // We now apply the second chance duration to find the clock // monotonic second deadline. The unique key is then the // pair. // // The second chance prevents a false timeout should there be // any clock monotonic advancement during suspend. auto newHandle = now + secondChanceDuration; ALOGD("%s: TimeCheck second chance applied for %s", __func__, node.mapped().first->tag.c_str()); // should be rare event. mSecondChanceRequests.emplace_hint(mSecondChanceRequests.end(), std::make_pair(newHandle, nextDeadline), std::move(node.mapped())); // increment second chance counter. mSecondChanceCount.fetch_add(1 /* arg */, std::memory_order_relaxed); } else { { _l.unlock(); // We add Request to retired queue early so that it can be dumped out. mTimeoutQueue.add(std::move(node.mapped().first)); node.mapped().second(nextDeadline); // Caution: we don't hold lock when we call TimerCallback, // but this is the timeout case! We will crash soon, // maybe before returning. // anything left over is released here outside lock. } // reacquire the lock - if something was added, we loop immediately to check. _l.lock(); } // always process expiring monitor requests first. continue; } } // now process any second chance requests. if (!mSecondChanceRequests.empty()) { Handle secondDeadline = mSecondChanceRequests.begin()->first.first; if (now == INVALID_HANDLE) now = std::chrono::steady_clock::now(); if (secondDeadline < now) { auto node = mSecondChanceRequests.extract(mSecondChanceRequests.begin()); { _l.unlock(); // We add Request to retired queue early so that it can be dumped out. mTimeoutQueue.add(std::move(node.mapped().first)); const Handle originalHandle = node.key().second; node.mapped().second(originalHandle); // Caution: we don't hold lock when we call TimerCallback. // This is benign issue - we permit concurrent operations // while in the callback to the MonitorQueue. // // Anything left over is released here outside lock. } // reacquire the lock - if something was added, we loop immediately to check. _l.lock(); continue; } // update the deadline. if (nextDeadline == INVALID_HANDLE) { nextDeadline = secondDeadline; } else { nextDeadline = std::min(nextDeadline, secondDeadline); } } if (nextDeadline != INVALID_HANDLE) { mCond.wait_until(_l, nextDeadline); } else { mCond.wait(_l); } } } TimerThread::Handle TimerThread::MonitorThread::add( std::shared_ptr request, TimerCallback&& func, Duration timeout) { std::lock_guard _l(mMutex); const Handle handle = getUniqueHandle_l(timeout); mMonitorRequests.emplace_hint(mMonitorRequests.end(), handle, std::make_pair(std::move(request), std::move(func))); mCond.notify_all(); return handle; } std::shared_ptr TimerThread::MonitorThread::remove(Handle handle) { std::pair, TimerCallback> data; std::unique_lock ul(mMutex); if (const auto it = mMonitorRequests.find(handle); it != mMonitorRequests.end()) { data = std::move(it->second); mMonitorRequests.erase(it); ul.unlock(); // manually release lock here so func (data.second) // is released outside of lock. return data.first; // request } // this check is O(N), but since the second chance requests are ordered // in terms of earliest expiration time, we would expect better than average results. for (auto it = mSecondChanceRequests.begin(); it != mSecondChanceRequests.end(); ++it) { if (it->first.second == handle) { data = std::move(it->second); mSecondChanceRequests.erase(it); ul.unlock(); // manually release lock here so func (data.second) // is released outside of lock. return data.first; // request } } return {}; } void TimerThread::MonitorThread::copyRequests( std::vector>& requests) const { std::lock_guard lg(mMutex); for (const auto &[deadline, monitorpair] : mMonitorRequests) { requests.emplace_back(monitorpair.first); } // we combine the second map with the first map - this is // everything that is pending on the monitor thread. // The second map will be older than the first map so this // is in order. for (const auto &[deadline, monitorpair] : mSecondChanceRequests) { requests.emplace_back(monitorpair.first); } } } // namespace android::mediautils