1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include <atomic> 20 #include <condition_variable> 21 #include <deque> 22 #include <functional> 23 #include <map> 24 #include <memory> 25 #include <mutex> 26 #include <string> 27 #include <thread> 28 #include <vector> 29 30 #include <android-base/thread_annotations.h> 31 32 #include <mediautils/FixedString.h> 33 34 namespace android::mediautils { 35 36 /** 37 * A thread for deferred execution of tasks, with cancellation. 38 */ 39 class TimerThread { 40 public: 41 // A Handle is a time_point that serves as a unique key to access a queued 42 // request to the TimerThread. 43 using Handle = std::chrono::steady_clock::time_point; 44 45 // Duration is based on steady_clock (typically nanoseconds) 46 // vs the system_clock duration (typically microseconds). 47 using Duration = std::chrono::steady_clock::duration; 48 49 static inline constexpr Handle INVALID_HANDLE = 50 std::chrono::steady_clock::time_point::min(); 51 52 // Handle implementation details: 53 // A Handle represents the timer expiration time based on std::chrono::steady_clock 54 // (clock monotonic). This Handle is computed as now() + timeout. 55 // 56 // The lsb of the Handle time_point is adjusted to indicate whether there is 57 // a timeout action (1) or not (0). 58 // 59 60 template <size_t COUNT> 61 static constexpr bool is_power_of_2_v = COUNT > 0 && (COUNT & (COUNT - 1)) == 0; 62 63 template <size_t COUNT> 64 static constexpr size_t mask_from_count_v = COUNT - 1; 65 66 static constexpr size_t HANDLE_TYPES = 2; 67 // HANDLE_TYPES must be a power of 2. 68 static_assert(is_power_of_2_v<HANDLE_TYPES>); 69 70 // The handle types 71 enum class HANDLE_TYPE : size_t { 72 NO_TIMEOUT = 0, 73 TIMEOUT = 1, 74 }; 75 76 static constexpr size_t HANDLE_TYPE_MASK = mask_from_count_v<HANDLE_TYPES>; 77 78 template <typename T> enum_as_value(T x)79 static constexpr auto enum_as_value(T x) { 80 return static_cast<std::underlying_type_t<T>>(x); 81 } 82 isNoTimeoutHandle(Handle handle)83 static inline bool isNoTimeoutHandle(Handle handle) { 84 return (handle.time_since_epoch().count() & HANDLE_TYPE_MASK) == 85 enum_as_value(HANDLE_TYPE::NO_TIMEOUT); 86 } 87 isTimeoutHandle(Handle handle)88 static inline bool isTimeoutHandle(Handle handle) { 89 return (handle.time_since_epoch().count() & HANDLE_TYPE_MASK) == 90 enum_as_value(HANDLE_TYPE::TIMEOUT); 91 } 92 93 // Returns a unique Handle that doesn't exist in the container. 94 template <size_t MAX_TYPED_HANDLES, size_t HANDLE_TYPE_AS_VALUE, typename C, typename T> getUniqueHandleForHandleType_l(C container,T timeout)95 static Handle getUniqueHandleForHandleType_l(C container, T timeout) { 96 static_assert(MAX_TYPED_HANDLES > 0 && HANDLE_TYPE_AS_VALUE < MAX_TYPED_HANDLES 97 && is_power_of_2_v<MAX_TYPED_HANDLES>, 98 " handles must be power of two"); 99 100 // Our initial handle is the deadline as computed from steady_clock. 101 auto deadline = std::chrono::steady_clock::now() + timeout; 102 103 // We adjust the lsbs by the minimum increment to have the correct 104 // HANDLE_TYPE in the least significant bits. 105 auto remainder = deadline.time_since_epoch().count() & HANDLE_TYPE_MASK; 106 size_t offset = HANDLE_TYPE_AS_VALUE > remainder ? HANDLE_TYPE_AS_VALUE - remainder : 107 MAX_TYPED_HANDLES + HANDLE_TYPE_AS_VALUE - remainder; 108 deadline += std::chrono::steady_clock::duration(offset); 109 110 // To avoid key collisions, advance the handle by MAX_TYPED_HANDLES (the modulus factor) 111 // until the key is unique. 112 while (container.find(deadline) != container.end()) { 113 deadline += std::chrono::steady_clock::duration(MAX_TYPED_HANDLES); 114 } 115 return deadline; 116 } 117 118 // TimerCallback invoked on timeout or cancel. 119 using TimerCallback = std::function<void(Handle)>; 120 121 /** 122 * Schedules a task to be executed in the future (`timeout` duration from now). 123 * 124 * \param tag string associated with the task. This need not be unique, 125 * as the Handle returned is used for cancelling. 126 * \param func callback function that is invoked at the timeout. 127 * \param timeoutDuration timeout duration which is converted to milliseconds with at 128 * least 45 integer bits. 129 * A timeout of 0 (or negative) means the timer never expires 130 * so func() is never called. These tasks are stored internally 131 * and reported in the toString() until manually cancelled. 132 * \returns a handle that can be used for cancellation. 133 */ 134 Handle scheduleTask( 135 std::string_view tag, TimerCallback&& func, 136 Duration timeoutDuration, Duration secondChanceDuration); 137 138 /** 139 * Tracks a task that shows up on toString() until cancelled. 140 * 141 * \param tag string associated with the task. 142 * \returns a handle that can be used for cancellation. 143 */ 144 Handle trackTask(std::string_view tag); 145 146 /** 147 * Cancels a task previously scheduled with scheduleTask() 148 * or trackTask(). 149 * 150 * \returns true if cancelled. If the task has already executed 151 * or if the handle doesn't exist, this is a no-op 152 * and returns false. 153 */ 154 bool cancelTask(Handle handle); 155 156 struct SnapshotAnalysis; 157 /** 158 * Take a snapshot of the current state of the TimerThread and determine the 159 * potential cause of a deadlock. 160 * \param retiredCount The number of successfully retired calls to capture 161 * (may be many). 162 * \return See below for a description of a SnapShotAnalysis object 163 */ 164 SnapshotAnalysis getSnapshotAnalysis(size_t retiredCount = SIZE_MAX) const; 165 166 /** 167 * Returns a string representation of the TimerThread queue. 168 * 169 * The queue is dumped in order of scheduling (not deadline). 170 */ 171 std::string pendingToString() const; 172 173 /** 174 * Returns a string representation of the last retired tasks. 175 * 176 * These tasks from trackTask() or scheduleTask() are 177 * cancelled. 178 * 179 * These are ordered when the task was retired. 180 * 181 * \param n is maximum number of tasks to dump. 182 */ 183 std::string retiredToString(size_t n = SIZE_MAX) const; 184 185 186 /** 187 * Returns a string representation of the last timeout tasks. 188 * 189 * These tasks from scheduleTask() which have timed-out. 190 * 191 * These are ordered when the task had timed-out. 192 * 193 * \param n is maximum number of tasks to dump. 194 */ 195 std::string timeoutToString(size_t n = SIZE_MAX) const; 196 197 /** 198 * Dumps a container with SmartPointer<Request> to a string. 199 * 200 * "{ Request1 } { Request2} ...{ RequestN }" 201 */ 202 template <typename T> requestsToString(const T & containerRequests)203 static std::string requestsToString(const T& containerRequests) { 204 std::string s; 205 // append seems to be faster than stringstream. 206 // https://stackoverflow.com/questions/18892281/most-optimized-way-of-concatenation-in-strings 207 for (const auto& request : containerRequests) { 208 s.append("{ ").append(request->toString()).append(" } "); 209 } 210 // If not empty, there's an extra space at the end, so we trim it off. 211 if (!s.empty()) s.pop_back(); 212 return s; 213 } 214 215 // To minimize movement of data, we pass around shared_ptrs to Requests. 216 // These are allocated and deallocated outside of the lock. 217 // TODO(b/243839867) consider options to merge Request with the 218 // TimeCheck::TimeCheckHandler struct. 219 struct Request { RequestRequest220 Request(std::chrono::system_clock::time_point _scheduled, 221 std::chrono::system_clock::time_point _deadline, 222 Duration _secondChanceDuration, 223 pid_t _tid, 224 std::string_view _tag) 225 : scheduled(_scheduled) 226 , deadline(_deadline) 227 , secondChanceDuration(_secondChanceDuration) 228 , tid(_tid) 229 , tag(_tag) 230 {} 231 232 const std::chrono::system_clock::time_point scheduled; 233 const std::chrono::system_clock::time_point deadline; // deadline := scheduled 234 // + timeoutDuration 235 // + secondChanceDuration 236 // if deadline == scheduled, no 237 // timeout, task not executed. 238 Duration secondChanceDuration; 239 const pid_t tid; 240 const FixedString62 tag; 241 std::string toString() const; 242 }; 243 244 245 // SnapshotAnalysis contains info deduced by analysisTimeout(). 246 247 struct SnapshotAnalysis { 248 // If we were unable to determine any applicable thread ids, 249 // we leave their value as INVALID_PID. 250 // Note, we use the linux thread id (not pthread), so its type is pid_t. 251 static constexpr pid_t INVALID_PID = -1; 252 // Description of likely issue and/or blocked method. 253 // Empty if no actionable info. 254 std::string description; 255 // Tid of the (latest) monitored thread which has timed out. 256 // This is the thread which the suspect is deduced with respect to. 257 // Most often, this is the thread which an abort is being triggered 258 // from. 259 pid_t timeoutTid = INVALID_PID; 260 // Tid of the (HAL) thread which has likely halted progress, selected 261 // from pendingRequests. May be the same as timeoutTid, if the timed-out 262 // thread directly called into the HAL. 263 pid_t suspectTid = INVALID_PID; 264 // Number of second chances given by the timer thread 265 size_t secondChanceCount; 266 // List of pending requests 267 std::vector<std::shared_ptr<const Request>> pendingRequests; 268 // List of timed-out requests 269 std::vector<std::shared_ptr<const Request>> timeoutRequests; 270 // List of retired requests 271 std::vector<std::shared_ptr<const Request>> retiredRequests; 272 // mutex deadlock / wait detection information. 273 bool hasMutexCycle = false; 274 std::vector<std::pair<pid_t, std::string>> mutexWaitChain; 275 // Dumps the information contained above as well as additional call 276 // stacks where applicable. 277 std::string toString(bool showTimeoutStack = true) const; 278 }; 279 280 private: 281 // Deque of requests, in order of add(). 282 // This class is thread-safe. 283 class RequestQueue { 284 public: RequestQueue(size_t maxSize)285 explicit RequestQueue(size_t maxSize) 286 : mRequestQueueMax(maxSize) {} 287 288 void add(std::shared_ptr<const Request>); 289 290 // return up to the last "n" requests retired. 291 void copyRequests(std::vector<std::shared_ptr<const Request>>& requests, 292 size_t n = SIZE_MAX) const; 293 294 private: 295 const size_t mRequestQueueMax; 296 mutable std::mutex mRQMutex; 297 std::deque<std::pair<std::chrono::system_clock::time_point, 298 std::shared_ptr<const Request>>> 299 mRequestQueue GUARDED_BY(mRQMutex); 300 }; 301 302 // A storage map of tasks without timeouts. There is no TimerCallback 303 // required, it just tracks the tasks with the tag, scheduled time and the tid. 304 // These tasks show up on a pendingToString() until manually cancelled. 305 class NoTimeoutMap { 306 mutable std::mutex mNTMutex; 307 std::map<Handle, std::shared_ptr<const Request>> mMap GUARDED_BY(mNTMutex); getUniqueHandle_l()308 Handle getUniqueHandle_l() REQUIRES(mNTMutex) { 309 return getUniqueHandleForHandleType_l< 310 HANDLE_TYPES, enum_as_value(HANDLE_TYPE::NO_TIMEOUT)>( 311 mMap, Duration{} /* timeout */); 312 } 313 314 public: 315 bool isValidHandle(Handle handle) const; // lock free 316 Handle add(std::shared_ptr<const Request> request); 317 std::shared_ptr<const Request> remove(Handle handle); 318 void copyRequests(std::vector<std::shared_ptr<const Request>>& requests) const; 319 }; 320 321 // Monitor thread. 322 // This thread manages shared pointers to Requests and a function to 323 // call on timeout. 324 // This class is thread-safe. 325 class MonitorThread { 326 std::atomic<size_t> mSecondChanceCount{}; 327 mutable std::mutex mMutex; 328 mutable std::condition_variable mCond GUARDED_BY(mMutex); 329 330 // Ordered map of requests based on time of deadline. 331 // 332 std::map<Handle, std::pair<std::shared_ptr<const Request>, TimerCallback>> 333 mMonitorRequests GUARDED_BY(mMutex); 334 335 // Due to monotonic/steady clock inaccuracies during suspend, 336 // we allow an additional second chance waiting time to prevent 337 // false removal. 338 339 // This mSecondChanceRequests queue is almost always empty. 340 // Using a pair with the original handle allows lookup and keeps 341 // the Key unique. 342 std::map<std::pair<Handle /* new */, Handle /* original */>, 343 std::pair<std::shared_ptr<const Request>, TimerCallback>> 344 mSecondChanceRequests GUARDED_BY(mMutex); 345 346 RequestQueue& mTimeoutQueue GUARDED_BY(mMutex); // added to when request times out. 347 348 // Worker thread variables 349 bool mShouldExit GUARDED_BY(mMutex) = false; 350 351 // To avoid race with initialization, 352 // mThread should be initialized last as the thread is launched immediately. 353 std::thread mThread; 354 355 void threadFunc(); getUniqueHandle_l(Duration timeout)356 Handle getUniqueHandle_l(Duration timeout) REQUIRES(mMutex) { 357 return getUniqueHandleForHandleType_l< 358 HANDLE_TYPES, enum_as_value(HANDLE_TYPE::TIMEOUT)>( 359 mMonitorRequests, timeout); 360 } 361 362 public: 363 MonitorThread(RequestQueue &timeoutQueue); 364 ~MonitorThread(); 365 366 Handle add(std::shared_ptr<const Request> request, TimerCallback&& func, 367 Duration timeout); 368 std::shared_ptr<const Request> remove(Handle handle); 369 void copyRequests(std::vector<std::shared_ptr<const Request>>& requests) const; getSecondChanceCount()370 size_t getSecondChanceCount() const { 371 return mSecondChanceCount.load(std::memory_order_relaxed); 372 } 373 }; 374 375 376 // A HAL method is where the substring "Hidl" is in the class name. 377 // The tag should look like: ... Hidl ... :: ... 378 static bool isRequestFromHal(const std::shared_ptr<const Request>& request); 379 380 std::vector<std::shared_ptr<const Request>> getPendingRequests() const; 381 382 static constexpr size_t kRetiredQueueMax = 16; 383 RequestQueue mRetiredQueue{kRetiredQueueMax}; // locked internally 384 385 static constexpr size_t kTimeoutQueueMax = 16; 386 RequestQueue mTimeoutQueue{kTimeoutQueueMax}; // locked internally 387 388 NoTimeoutMap mNoTimeoutMap; // locked internally 389 390 MonitorThread mMonitorThread{mTimeoutQueue}; // This should be initialized last because 391 // the thread is launched immediately. 392 // Locked internally. 393 }; 394 395 } // namespace android::mediautils 396