• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "TimeCheck"
18 
19 #include <optional>
20 
21 #include <android-base/logging.h>
22 #include <audio_utils/clock.h>
23 #include <mediautils/EventLog.h>
24 #include <mediautils/FixedString.h>
25 #include <mediautils/MethodStatistics.h>
26 #include <mediautils/TimeCheck.h>
27 #include <utils/Log.h>
28 #include "debuggerd/handler.h"
29 
30 namespace android::mediautils {
31 
32 /**
33  * Returns the std::string "HH:MM:SS.MSc" from a system_clock time_point.
34  */
formatTime(std::chrono::system_clock::time_point t)35 std::string formatTime(std::chrono::system_clock::time_point t) {
36     auto time_string = audio_utils_time_string_from_ns(
37             std::chrono::nanoseconds(t.time_since_epoch()).count());
38 
39     // The time string is 19 characters (including null termination).
40     // Example: "03-27 16:47:06.187"
41     //           MM DD HH MM SS MS
42     // We offset by 6 to get HH:MM:SS.MSc
43     //
44     return time_string.time + 6; // offset to remove month/day.
45 }
46 
47 /**
48  * Finds the end of the common time prefix.
49  *
50  * This is as an option to remove the common time prefix to avoid
51  * unnecessary duplicated strings.
52  *
53  * \param time1 a time string
54  * \param time2 a time string
55  * \return      the position where the common time prefix ends. For abbreviated
56  *              printing of time2, offset the character pointer by this position.
57  */
commonTimePrefixPosition(std::string_view time1,std::string_view time2)58 static size_t commonTimePrefixPosition(std::string_view time1, std::string_view time2) {
59     const size_t endPos = std::min(time1.size(), time2.size());
60     size_t i;
61 
62     // Find location of the first mismatch between strings
63     for (i = 0; ; ++i) {
64         if (i == endPos) {
65             return i; // strings match completely to the length of one of the strings.
66         }
67         if (time1[i] != time2[i]) {
68             break;
69         }
70         if (time1[i] == '\0') {
71             return i; // "printed" strings match completely.  No need to check further.
72         }
73     }
74 
75     // Go backwards until we find a delimeter or space.
76     for (; i > 0
77            && isdigit(time1[i]) // still a number
78            && time1[i - 1] != ' '
79          ; --i) {
80     }
81     return i;
82 }
83 
84 /**
85  * Returns the unique suffix of time2 that isn't present in time1.
86  *
87  * If time2 is identical to time1, then an empty string_view is returned.
88  * This method is used to elide the common prefix when printing times.
89  */
timeSuffix(std::string_view time1,std::string_view time2)90 std::string_view timeSuffix(std::string_view time1, std::string_view time2) {
91     const size_t pos = commonTimePrefixPosition(time1, time2);
92     return time2.substr(pos);
93 }
94 
95 // Audio HAL server pids vector used to generate audio HAL processes tombstone
96 // when audioserver watchdog triggers.
97 // We use a lockless storage to avoid potential deadlocks in the context of watchdog
98 // trigger.
99 // Protection again simultaneous writes is not needed given one update takes place
100 // during AudioFlinger construction and other comes necessarily later once the IAudioFlinger
101 // interface is available.
102 // The use of an atomic index just guaranties that current vector is fully initialized
103 // when read.
104 /* static */
accessAudioHalPids(std::vector<pid_t> * pids,bool update)105 void TimeCheck::accessAudioHalPids(std::vector<pid_t>* pids, bool update) {
106     static constexpr int kNumAudioHalPidsVectors = 3;
107     static std::vector<pid_t> audioHalPids[kNumAudioHalPidsVectors];
108     static std::atomic<unsigned> curAudioHalPids = 0;
109 
110     if (update) {
111         audioHalPids[(curAudioHalPids++ + 1) % kNumAudioHalPidsVectors] = *pids;
112     } else {
113         *pids = audioHalPids[curAudioHalPids % kNumAudioHalPidsVectors];
114     }
115 }
116 
117 /* static */
setAudioHalPids(const std::vector<pid_t> & pids)118 void TimeCheck::setAudioHalPids(const std::vector<pid_t>& pids) {
119     accessAudioHalPids(&(const_cast<std::vector<pid_t>&>(pids)), true);
120 }
121 
122 /* static */
getAudioHalPids()123 std::vector<pid_t> TimeCheck::getAudioHalPids() {
124     std::vector<pid_t> pids;
125     accessAudioHalPids(&pids, false);
126     return pids;
127 }
128 
129 /* static */
getTimeCheckThread()130 TimerThread& TimeCheck::getTimeCheckThread() {
131     static TimerThread sTimeCheckThread{};
132     return sTimeCheckThread;
133 }
134 
135 /* static */
toString()136 std::string TimeCheck::toString() {
137     // note pending and retired are individually locked for maximum concurrency,
138     // snapshot is not instantaneous at a single time.
139     return getTimeCheckThread().toString();
140 }
141 
TimeCheck(std::string_view tag,OnTimerFunc && onTimer,Duration requestedTimeoutDuration,Duration secondChanceDuration,bool crashOnTimeout)142 TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, Duration requestedTimeoutDuration,
143         Duration secondChanceDuration, bool crashOnTimeout)
144     : mTimeCheckHandler{ std::make_shared<TimeCheckHandler>(
145             tag, std::move(onTimer), crashOnTimeout, requestedTimeoutDuration,
146             secondChanceDuration, std::chrono::system_clock::now(), gettid()) }
147     , mTimerHandle(requestedTimeoutDuration.count() == 0
148               /* for TimeCheck we don't consider a non-zero secondChanceDuration here */
149               ? getTimeCheckThread().trackTask(mTimeCheckHandler->tag)
150               : getTimeCheckThread().scheduleTask(
151                       mTimeCheckHandler->tag,
152                       // Pass in all the arguments by value to this task for safety.
153                       // The thread could call the callback before the constructor is finished.
154                       // The destructor is not blocked on callback.
__anon8f39a8ae0102(TimerThread::Handle timerHandle) 155                       [ timeCheckHandler = mTimeCheckHandler ](TimerThread::Handle timerHandle) {
156                           timeCheckHandler->onTimeout(timerHandle);
157                       },
158                       requestedTimeoutDuration,
159                       secondChanceDuration)) {}
160 
~TimeCheck()161 TimeCheck::~TimeCheck() {
162     if (mTimeCheckHandler) {
163         mTimeCheckHandler->onCancel(mTimerHandle);
164     }
165 }
166 
167 /* static */
analyzeTimeouts(float requestedTimeoutMs,float elapsedSteadyMs,float elapsedSystemMs)168 std::string TimeCheck::analyzeTimeouts(
169         float requestedTimeoutMs, float elapsedSteadyMs, float elapsedSystemMs) {
170     // Track any OS clock issues with suspend.
171     // It is possible that the elapsedSystemMs is much greater than elapsedSteadyMs if
172     // a suspend occurs; however, we always expect the timeout ms should always be slightly
173     // less than the elapsed steady ms regardless of whether a suspend occurs or not.
174 
175     std::string s("Timeout ms ");
176     s.append(std::to_string(requestedTimeoutMs))
177         .append(" elapsed steady ms ").append(std::to_string(elapsedSteadyMs))
178         .append(" elapsed system ms ").append(std::to_string(elapsedSystemMs));
179 
180     // Is there something unusual?
181     static constexpr float TOLERANCE_CONTEXT_SWITCH_MS = 200.f;
182 
183     if (requestedTimeoutMs > elapsedSteadyMs || requestedTimeoutMs > elapsedSystemMs) {
184         s.append("\nError: early expiration - "
185                 "requestedTimeoutMs should be less than elapsed time");
186     }
187 
188     if (elapsedSteadyMs > elapsedSystemMs + TOLERANCE_CONTEXT_SWITCH_MS) {
189         s.append("\nWarning: steady time should not advance faster than system time");
190     }
191 
192     // This has been found in suspend stress testing.
193     if (elapsedSteadyMs > requestedTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) {
194         s.append("\nWarning: steady time significantly exceeds timeout "
195                 "- possible thread stall or aborted suspend");
196     }
197 
198     // This has been found in suspend stress testing.
199     if (elapsedSystemMs > requestedTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) {
200         s.append("\nInformation: system time significantly exceeds timeout "
201                 "- possible suspend");
202     }
203     return s;
204 }
205 
206 // To avoid any potential race conditions, the timer handle
207 // (expiration = clock steady start + timeout) is passed into the callback.
onCancel(TimerThread::Handle timerHandle) const208 void TimeCheck::TimeCheckHandler::onCancel(TimerThread::Handle timerHandle) const
209 {
210     if (TimeCheck::getTimeCheckThread().cancelTask(timerHandle) && onTimer) {
211         const std::chrono::steady_clock::time_point endSteadyTime =
212                 std::chrono::steady_clock::now();
213         const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>(
214                 endSteadyTime - timerHandle + timeoutDuration).count();
215         // send the elapsed steady time for statistics.
216         onTimer(false /* timeout */, elapsedSteadyMs);
217     }
218 }
219 
220 // To avoid any potential race conditions, the timer handle
221 // (expiration = clock steady start + timeout) is passed into the callback.
onTimeout(TimerThread::Handle timerHandle) const222 void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) const
223 {
224     const std::chrono::steady_clock::time_point endSteadyTime = std::chrono::steady_clock::now();
225     const std::chrono::system_clock::time_point endSystemTime = std::chrono::system_clock::now();
226     // timerHandle incorporates the timeout
227     const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>(
228             endSteadyTime - (timerHandle - timeoutDuration)).count();
229     const float elapsedSystemMs = std::chrono::duration_cast<FloatMs>(
230             endSystemTime - startSystemTime).count();
231     const float requestedTimeoutMs = std::chrono::duration_cast<FloatMs>(
232             timeoutDuration).count();
233     const float secondChanceMs = std::chrono::duration_cast<FloatMs>(
234             secondChanceDuration).count();
235 
236     if (onTimer) {
237         onTimer(true /* timeout */, elapsedSteadyMs);
238     }
239 
240     if (!crashOnTimeout) return;
241 
242     // Generate the TimerThread summary string early before sending signals to the
243     // HAL processes which can affect thread behavior.
244     const std::string summary = getTimeCheckThread().toString(4 /* retiredCount */);
245 
246     // Generate audio HAL processes tombstones and allow time to complete
247     // before forcing restart
248     std::vector<pid_t> pids = TimeCheck::getAudioHalPids();
249     std::string halPids = "HAL pids [ ";
250     if (pids.size() != 0) {
251         for (const auto& pid : pids) {
252             ALOGI("requesting tombstone for pid: %d", pid);
253             halPids.append(std::to_string(pid)).append(" ");
254             sigqueue(pid, DEBUGGER_SIGNAL, {.sival_int = 0});
255         }
256         sleep(1);
257     } else {
258         ALOGI("No HAL process pid available, skipping tombstones");
259     }
260     halPids.append("]");
261 
262     LOG_EVENT_STRING(LOGTAG_AUDIO_BINDER_TIMEOUT, tag.c_str());
263 
264     // Create abort message string - caution: this can be very large.
265     const std::string abortMessage = std::string("TimeCheck timeout for ")
266             .append(tag)
267             .append(" scheduled ").append(formatTime(startSystemTime))
268             .append(" on thread ").append(std::to_string(tid)).append("\n")
269             .append(analyzeTimeouts(requestedTimeoutMs + secondChanceMs,
270                     elapsedSteadyMs, elapsedSystemMs)).append("\n")
271             .append(halPids).append("\n")
272             .append(summary);
273 
274     // Note: LOG_ALWAYS_FATAL limits the size of the string - per log/log.h:
275     // Log message text may be truncated to less than an
276     // implementation-specific limit (1023 bytes).
277     //
278     // Here, we send the string through android-base/logging.h LOG()
279     // to avoid the size limitation. LOG(FATAL) does an abort whereas
280     // LOG(FATAL_WITHOUT_ABORT) does not abort.
281 
282     LOG(FATAL) << abortMessage;
283 }
284 
285 // Automatically create a TimeCheck class for a class and method.
286 // This is used for Audio HIDL support.
makeTimeCheckStatsForClassMethod(std::string_view className,std::string_view methodName)287 mediautils::TimeCheck makeTimeCheckStatsForClassMethod(
288         std::string_view className, std::string_view methodName) {
289     std::shared_ptr<MethodStatistics<std::string>> statistics =
290             mediautils::getStatisticsForClass(className);
291     if (!statistics) return {}; // empty TimeCheck.
292     return mediautils::TimeCheck(
293             FixedString62(className).append("::").append(methodName),
294             [ safeMethodName = FixedString30(methodName),
295               stats = std::move(statistics) ]
296             (bool timeout, float elapsedMs) {
297                     if (timeout) {
298                         ; // ignored, there is no timeout value.
299                     } else {
300                         stats->event(safeMethodName.asStringView(), elapsedMs);
301                     }
302             }, {} /* timeoutDuration */, {} /* secondChanceDuration */, false /* crashOnTimeout */);
303 }
304 
305 }  // namespace android::mediautils
306