1 /* 2 * Copyright (c) 2020, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef CPP_WATCHDOG_SERVER_SRC_WATCHDOGPERFSERVICE_H_ 18 #define CPP_WATCHDOG_SERVER_SRC_WATCHDOGPERFSERVICE_H_ 19 20 #include "LooperWrapper.h" 21 #include "ProcDiskStatsCollector.h" 22 #include "ProcStatCollector.h" 23 #include "UidStatsCollector.h" 24 #include "WatchdogServiceHelper.h" 25 26 #include <WatchdogProperties.sysprop.h> 27 #include <aidl/android/automotive/watchdog/internal/PackageIoOveruseStats.h> 28 #include <aidl/android/automotive/watchdog/internal/ResourceStats.h> 29 #include <aidl/android/automotive/watchdog/internal/UserState.h> 30 #include <android-base/chrono_utils.h> 31 #include <android-base/result.h> 32 #include <cutils/multiuser.h> 33 #include <gtest/gtest_prod.h> 34 #include <utils/Errors.h> 35 #include <utils/Looper.h> 36 #include <utils/Mutex.h> 37 #include <utils/RefBase.h> 38 #include <utils/String16.h> 39 #include <utils/StrongPointer.h> 40 #include <utils/Vector.h> 41 42 #include <time.h> 43 44 #include <string> 45 #include <thread> // NOLINT(build/c++11) 46 #include <unordered_set> 47 48 namespace android { 49 namespace automotive { 50 namespace watchdog { 51 52 // Forward declaration for testing use only. 53 namespace internal { 54 55 class WatchdogPerfServicePeer; 56 57 } // namespace internal 58 59 constexpr std::chrono::seconds kDefaultPostSystemEventDurationSec = 30s; 60 constexpr std::chrono::seconds kDefaultWakeUpEventDurationSec = 30s; 61 constexpr std::chrono::seconds kDefaultUserSwitchTimeoutSec = 30s; 62 constexpr std::chrono::nanoseconds kPrevUnsentResourceStatsMaxDurationNs = 10min; 63 constexpr const char* kStartCustomCollectionFlag = "--start_perf"; 64 constexpr const char* kEndCustomCollectionFlag = "--stop_perf"; 65 constexpr const char* kIntervalFlag = "--interval"; 66 constexpr const char* kMaxDurationFlag = "--max_duration"; 67 constexpr const char* kFilterPackagesFlag = "--filter_packages"; 68 69 enum SystemState { 70 NORMAL_MODE = 0, 71 GARAGE_MODE = 1, 72 }; 73 74 /** 75 * DataProcessor defines methods that must be implemented in order to process the data collected 76 * by |WatchdogPerfService|. 77 */ 78 class DataProcessorInterface : virtual public android::RefBase { 79 public: DataProcessorInterface()80 DataProcessorInterface() {} ~DataProcessorInterface()81 virtual ~DataProcessorInterface() {} 82 // Returns the name of the data processor. 83 virtual std::string name() const = 0; 84 // Callback to initialize the data processor. 85 virtual android::base::Result<void> init() = 0; 86 // Callback to terminate the data processor. 87 virtual void terminate() = 0; 88 // Callback to perform actions (such as clearing stats from previous system startup events) 89 // before starting boot-time or wake-up collections. 90 virtual android::base::Result<void> onSystemStartup() = 0; 91 // Callback to perform actions once CarWatchdogService is registered. 92 virtual void onCarWatchdogServiceRegistered() = 0; 93 // Callback to process the data collected during boot-time. 94 virtual android::base::Result<void> onBoottimeCollection( 95 time_t time, const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 96 const android::wp<ProcStatCollectorInterface>& procStatCollector, 97 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0; 98 // Callback to process the data collected during a wake-up event. 99 virtual android::base::Result<void> onWakeUpCollection( 100 time_t time, const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 101 const android::wp<ProcStatCollectorInterface>& procStatCollector) = 0; 102 // Callback to process the data collected periodically post boot complete. 103 virtual android::base::Result<void> onPeriodicCollection( 104 time_t time, SystemState systemState, 105 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 106 const android::wp<ProcStatCollectorInterface>& procStatCollector, 107 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0; 108 // Callback to process the data collected during user switch. 109 virtual android::base::Result<void> onUserSwitchCollection( 110 time_t time, userid_t from, userid_t to, 111 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 112 const android::wp<ProcStatCollectorInterface>& procStatCollector) = 0; 113 114 /** 115 * Callback to process the data collected on custom collection and filter the results only to 116 * the specified |filterPackages|. 117 */ 118 virtual android::base::Result<void> onCustomCollection( 119 time_t time, SystemState systemState, 120 const std::unordered_set<std::string>& filterPackages, 121 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 122 const android::wp<ProcStatCollectorInterface>& procStatCollector, 123 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0; 124 /** 125 * Callback to periodically monitor the collected data and trigger the given |alertHandler| 126 * on detecting resource overuse. 127 */ 128 virtual android::base::Result<void> onPeriodicMonitor( 129 time_t time, const android::wp<ProcDiskStatsCollectorInterface>& procDiskStatsCollector, 130 const std::function<void()>& alertHandler) = 0; 131 // Callback to dump the boot-time collected and periodically collected data. 132 virtual android::base::Result<void> onDump(int fd) const = 0; 133 /** 134 * Callback to dump the custom collected data. When fd == -1, clear the custom collection cache. 135 */ 136 virtual android::base::Result<void> onCustomCollectionDump(int fd) = 0; 137 }; 138 139 enum EventType { 140 // WatchdogPerfService's state. 141 INIT = 0, 142 TERMINATED, 143 144 // Collection events. 145 BOOT_TIME_COLLECTION, 146 PERIODIC_COLLECTION, 147 USER_SWITCH_COLLECTION, 148 WAKE_UP_COLLECTION, 149 CUSTOM_COLLECTION, 150 151 // Monitor event. 152 PERIODIC_MONITOR, 153 154 LAST_EVENT, 155 }; 156 157 enum SwitchMessage { 158 /** 159 * On receiving this message, collect the last boot-time record and start periodic collection 160 * and monitor. 161 */ 162 END_BOOTTIME_COLLECTION = EventType::LAST_EVENT + 1, 163 164 /** 165 * On receiving this message, collect the last user switch record and start periodic collection 166 * and monitor. 167 */ 168 END_USER_SWITCH_COLLECTION, 169 170 /** 171 * On receiving this message, collect the last wake up record and start periodic collection and 172 * monitor. 173 */ 174 END_WAKE_UP_COLLECTION, 175 176 /** 177 * On receiving this message, ends custom collection, discard collected data and start periodic 178 * collection and monitor. 179 */ 180 END_CUSTOM_COLLECTION, 181 182 LAST_SWITCH_MSG, 183 }; 184 185 enum TaskMessage { 186 // On receiving this message, send the cached resource stats to CarWatchdogService. 187 SEND_RESOURCE_STATS = SwitchMessage::LAST_SWITCH_MSG + 1, 188 }; 189 190 /** 191 * WatchdogPerfServiceInterface collects performance data during boot-time, user switch, system wake 192 * up and periodically post system events. It exposes APIs that the main thread and binder service 193 * can call to start a collection, switch the collection type, and generate collection dumps. 194 */ 195 class WatchdogPerfServiceInterface : virtual public MessageHandler { 196 public: 197 // Register a data processor to process the data collected by |WatchdogPerfService|. 198 virtual android::base::Result<void> registerDataProcessor( 199 android::sp<DataProcessorInterface> processor) = 0; 200 /** 201 * Starts the boot-time collection in the looper handler on a new thread and returns 202 * immediately. Must be called only once. Otherwise, returns an error. 203 */ 204 virtual android::base::Result<void> start() = 0; 205 // Terminates the collection thread and returns. 206 virtual void terminate() = 0; 207 // Sets the system state. 208 virtual void setSystemState(SystemState systemState) = 0; 209 // Handles unsent resource stats. 210 virtual void onCarWatchdogServiceRegistered() = 0; 211 // Ends the boot-time collection by switching to periodic collection after the post event 212 // duration. 213 virtual android::base::Result<void> onBootFinished() = 0; 214 // Starts and ends the user switch collection depending on the user states received. 215 virtual android::base::Result<void> onUserStateChange( 216 userid_t userId, 217 const aidl::android::automotive::watchdog::internal::UserState& userState) = 0; 218 // Starts wake-up collection. Any running collection is stopped, except for custom collections. 219 virtual android::base::Result<void> onSuspendExit() = 0; 220 // Called on shutdown enter, suspend enter and hibernation enter. 221 virtual android::base::Result<void> onShutdownEnter() = 0; 222 223 /** 224 * Depending on the arguments, it either: 225 * 1. Starts a custom collection. 226 * 2. Or ends the current custom collection and dumps the collected data. 227 * Returns any error observed during the dump generation. 228 */ 229 virtual android::base::Result<void> onCustomCollection(int fd, const char** args, 230 uint32_t numArgs) = 0; 231 // Generates a dump from the boot-time and periodic collection events. 232 virtual android::base::Result<void> onDump(int fd) const = 0; 233 // Dumps the help text. 234 virtual bool dumpHelpText(int fd) const = 0; 235 }; 236 237 class WatchdogPerfService final : public WatchdogPerfServiceInterface { 238 public: WatchdogPerfService(const android::sp<WatchdogServiceHelperInterface> & watchdogServiceHelper)239 WatchdogPerfService(const android::sp<WatchdogServiceHelperInterface>& watchdogServiceHelper) : 240 mPostSystemEventDurationNs(std::chrono::duration_cast<std::chrono::nanoseconds>( 241 std::chrono::seconds(sysprop::postSystemEventDuration().value_or( 242 kDefaultPostSystemEventDurationSec.count())))), 243 mWakeUpDurationNs(std::chrono::duration_cast<std::chrono::nanoseconds>( 244 std::chrono::seconds(sysprop::wakeUpEventDuration().value_or( 245 kDefaultWakeUpEventDurationSec.count())))), 246 mUserSwitchTimeoutNs(std::chrono::duration_cast<std::chrono::nanoseconds>( 247 std::chrono::seconds(sysprop::userSwitchTimeout().value_or( 248 kDefaultUserSwitchTimeoutSec.count())))), 249 mHandlerLooper(android::sp<LooperWrapper>::make()), 250 mSystemState(NORMAL_MODE), 251 mBoottimeCollection({}), 252 mPeriodicCollection({}), 253 mUserSwitchCollection({}), 254 mCustomCollection({}), 255 mPeriodicMonitor({}), 256 mUnsentResourceStats({}), 257 mCurrCollectionEvent(EventType::INIT), 258 mUidStatsCollector(android::sp<UidStatsCollector>::make()), 259 mProcStatCollector(android::sp<ProcStatCollector>::make()), 260 mProcDiskStatsCollector(android::sp<ProcDiskStatsCollector>::make()), 261 mDataProcessors({}), 262 mWatchdogServiceHelper(watchdogServiceHelper) {} 263 264 android::base::Result<void> registerDataProcessor( 265 android::sp<DataProcessorInterface> processor) override; 266 267 android::base::Result<void> start() override; 268 269 void terminate() override; 270 271 void setSystemState(SystemState systemState) override; 272 273 void onCarWatchdogServiceRegistered() override; 274 275 android::base::Result<void> onBootFinished() override; 276 277 android::base::Result<void> onUserStateChange( 278 userid_t userId, 279 const aidl::android::automotive::watchdog::internal::UserState& userState) override; 280 281 android::base::Result<void> onSuspendExit() override; 282 283 android::base::Result<void> onShutdownEnter() override; 284 285 android::base::Result<void> onCustomCollection(int fd, const char** args, 286 uint32_t numArgs) override; 287 288 android::base::Result<void> onDump(int fd) const override; 289 290 bool dumpHelpText(int fd) const override; 291 292 private: 293 struct EventMetadata { 294 // Collection or monitor event. 295 EventType eventType = EventType::LAST_EVENT; 296 // Interval between subsequent events. 297 std::chrono::nanoseconds pollingIntervalNs = 0ns; 298 // Used to calculate the uptime for next event. 299 nsecs_t lastPollUptimeNs = 0; 300 // Filter the results only to the specified packages. 301 std::unordered_set<std::string> filterPackages; 302 303 std::string toString() const; 304 }; 305 306 struct UserSwitchEventMetadata : WatchdogPerfService::EventMetadata { 307 // User id of user being switched from. 308 userid_t from = 0; 309 // User id of user being switched to. 310 userid_t to = 0; 311 }; 312 313 // Dumps the collectors' status when they are disabled. 314 android::base::Result<void> dumpCollectorsStatusLocked(int fd) const; 315 316 /** 317 * Starts a custom collection on the looper handler, temporarily stops the periodic collection 318 * (won't discard the collected data), and returns immediately. Returns any error observed 319 * during this process. 320 * The custom collection happens once every |interval| seconds. When the |maxDuration| is 321 * reached, the looper receives a message to end the collection, discards the collected data, 322 * and starts the periodic collection. This is needed to ensure the custom collection doesn't 323 * run forever when a subsequent |endCustomCollection| call is not received. 324 * When |kFilterPackagesFlag| value specified, the results are filtered only to the specified 325 * package names. 326 */ 327 android::base::Result<void> startCustomCollection( 328 std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration, 329 const std::unordered_set<std::string>& filterPackages); 330 331 /** 332 * Ends the current custom collection, generates a dump, sends a looper message to start the 333 * periodic collection, and returns immediately. Returns an error when there is no custom 334 * collection running or when a dump couldn't be generated from the custom collection. 335 */ 336 android::base::Result<void> endCustomCollection(int fd); 337 338 // Start a user switch collection. 339 android::base::Result<void> startUserSwitchCollection(); 340 341 // Switch to periodic collection and periodic monitor. 342 void switchToPeriodicLocked(bool startNow); 343 344 // Handles the messages received by the lopper. 345 void handleMessage(const Message& message) override; 346 347 // Processes the collection events received by |handleMessage|. 348 android::base::Result<void> processCollectionEvent(EventMetadata* metadata); 349 350 // Collects/processes the performance data for the current collection event. 351 android::base::Result<void> collectLocked(EventMetadata* metadata); 352 353 // Processes the monitor events received by |handleMessage|. 354 android::base::Result<void> processMonitorEvent(EventMetadata* metadata); 355 356 // Sends the unsent resource stats. 357 android::base::Result<void> sendResourceStats(); 358 359 // Notifies all registered data processors that either boot-time or wake-up collection will 360 // start. Individual implementations of data processors may clear stats collected during 361 // previous system startup events. 362 android::base::Result<void> notifySystemStartUpLocked(); 363 364 // Caches resource stats that have not been sent to CarWatchdogService. 365 void cacheUnsentResourceStatsLocked( 366 aidl::android::automotive::watchdog::internal::ResourceStats resourceStats); 367 368 /** 369 * Returns the metadata for the current collection based on |mCurrCollectionEvent|. Returns 370 * nullptr on invalid collection event. 371 */ 372 EventMetadata* getCurrentCollectionMetadataLocked(); 373 374 // Duration to extend a system event collection after the final signal is received. 375 std::chrono::nanoseconds mPostSystemEventDurationNs; 376 377 // Duration of the wake-up collection event. 378 std::chrono::nanoseconds mWakeUpDurationNs; 379 380 // Timeout duration for user switch collection in case final signal isn't received. 381 std::chrono::nanoseconds mUserSwitchTimeoutNs; 382 383 // Thread on which the actual collection happens. 384 std::thread mCollectionThread; 385 386 // Makes sure only one collection is running at any given time. 387 mutable Mutex mMutex; 388 389 // Handler looper to execute different collection events on the collection thread. 390 android::sp<LooperWrapper> mHandlerLooper GUARDED_BY(mMutex); 391 392 // Current system state. 393 SystemState mSystemState GUARDED_BY(mMutex); 394 395 // Info for the |EventType::BOOT_TIME_COLLECTION| collection event. 396 EventMetadata mBoottimeCollection GUARDED_BY(mMutex); 397 398 // Info for the |EventType::PERIODIC_COLLECTION| collection event. 399 EventMetadata mPeriodicCollection GUARDED_BY(mMutex); 400 401 // Info for the |EventType::USER_SWITCH_COLLECTION| collection event. 402 UserSwitchEventMetadata mUserSwitchCollection GUARDED_BY(mMutex); 403 404 // Info for the |EventType::WAKE_UP_COLLECTION| collection event. 405 EventMetadata mWakeUpCollection GUARDED_BY(mMutex); 406 407 // Info for the |EventType::CUSTOM_COLLECTION| collection event. The info is cleared at the end 408 // of every custom collection. 409 EventMetadata mCustomCollection GUARDED_BY(mMutex); 410 411 // Info for the |EventType::PERIODIC_MONITOR| monitor event. 412 EventMetadata mPeriodicMonitor GUARDED_BY(mMutex); 413 414 // Cache of resource stats that have not been sent to CarWatchdogService. 415 std::vector<std::tuple<nsecs_t, aidl::android::automotive::watchdog::internal::ResourceStats>> 416 mUnsentResourceStats GUARDED_BY(mMutex); 417 418 // Tracks either the WatchdogPerfService's state or current collection event. Updated on 419 // |start|, |onBootFinished|, |onUserStateChange|, |startCustomCollection|, 420 // |endCustomCollection|, and |terminate|. 421 EventType mCurrCollectionEvent GUARDED_BY(mMutex); 422 423 // Collector for UID process and I/O stats. 424 android::sp<UidStatsCollectorInterface> mUidStatsCollector GUARDED_BY(mMutex); 425 426 // Collector/parser for `/proc/stat`. 427 android::sp<ProcStatCollectorInterface> mProcStatCollector GUARDED_BY(mMutex); 428 429 // Collector/parser for `/proc/diskstats` file. 430 android::sp<ProcDiskStatsCollectorInterface> mProcDiskStatsCollector GUARDED_BY(mMutex); 431 432 // Data processors for the collected performance data. 433 std::vector<android::sp<DataProcessorInterface>> mDataProcessors GUARDED_BY(mMutex); 434 435 // Helper to communicate with the CarWatchdogService. 436 android::sp<WatchdogServiceHelperInterface> mWatchdogServiceHelper GUARDED_BY(mMutex); 437 438 // For unit tests. 439 friend class internal::WatchdogPerfServicePeer; 440 FRIEND_TEST(WatchdogPerfServiceTest, TestServiceStartAndTerminate); 441 }; 442 443 } // namespace watchdog 444 } // namespace automotive 445 } // namespace android 446 447 #endif // CPP_WATCHDOG_SERVER_SRC_WATCHDOGPERFSERVICE_H_ 448