• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false  // STOPSHIP if true.
19 
20 #include "WatchdogPerfService.h"
21 
22 #include <WatchdogProperties.sysprop.h>
23 #include <android-base/file.h>
24 #include <android-base/parseint.h>
25 #include <android-base/stringprintf.h>
26 #include <android-base/strings.h>
27 #include <log/log.h>
28 #include <processgroup/sched_policy.h>
29 
30 #include <pthread.h>
31 
32 #include <iterator>
33 #include <vector>
34 
35 namespace android {
36 namespace automotive {
37 namespace watchdog {
38 
39 using ::android::sp;
40 using ::android::String16;
41 using ::android::String8;
42 using ::android::automotive::watchdog::internal::PowerCycle;
43 using ::android::base::Error;
44 using ::android::base::Join;
45 using ::android::base::ParseUint;
46 using ::android::base::Result;
47 using ::android::base::Split;
48 using ::android::base::StringAppendF;
49 using ::android::base::StringPrintf;
50 using ::android::base::WriteStringToFd;
51 
52 namespace {
53 
54 // Minimum required collection interval between subsequent collections.
55 const std::chrono::nanoseconds kMinEventInterval = 1s;
56 const std::chrono::seconds kDefaultBoottimeCollectionInterval = 1s;
57 const std::chrono::seconds kDefaultPeriodicCollectionInterval = 20s;
58 const std::chrono::seconds kDefaultPeriodicMonitorInterval = 5s;
59 const std::chrono::nanoseconds kCustomCollectionInterval = 10s;
60 const std::chrono::nanoseconds kCustomCollectionDuration = 30min;
61 
62 constexpr const char* kServiceName = "WatchdogPerfService";
63 static const std::string kDumpMajorDelimiter = std::string(100, '-') + "\n";  // NOLINT
64 constexpr const char* kHelpText =
65         "\n%s dump options:\n"
66         "%s: Starts custom performance data collection. Customize the collection behavior with "
67         "the following optional arguments:\n"
68         "\t%s <seconds>: Modifies the collection interval. Default behavior is to collect once "
69         "every %lld seconds.\n"
70         "\t%s <seconds>: Modifies the maximum collection duration. Default behavior is to collect "
71         "until %ld minutes before automatically stopping the custom collection and discarding "
72         "the collected data.\n"
73         "\t%s <package name>,<package name>,...: Comma-separated value containing package names. "
74         "When provided, the results are filtered only to the provided package names. Default "
75         "behavior is to list the results for the top N packages.\n"
76         "%s: Stops custom performance data collection and generates a dump of "
77         "the collection report.\n\n"
78         "When no options are specified, the carwatchdog report contains the performance data "
79         "collected during boot-time and over the last few minutes before the report generation.\n";
80 
parseSecondsFlag(const Vector<String16> & args,size_t pos)81 Result<std::chrono::seconds> parseSecondsFlag(const Vector<String16>& args, size_t pos) {
82     if (args.size() <= pos) {
83         return Error() << "Value not provided";
84     }
85     uint64_t value;
86     if (std::string strValue = std::string(String8(args[pos]).string());
87         !ParseUint(strValue, &value)) {
88         return Error() << "Invalid value " << strValue << ", must be an integer";
89     }
90     return std::chrono::seconds(value);
91 }
92 
toString(std::variant<EventType,SwitchMessage> what)93 constexpr const char* toString(std::variant<EventType, SwitchMessage> what) {
94     return std::visit(
95             [&](const auto& v) -> const char* {
96                 switch (static_cast<int>(v)) {
97                     case EventType::INIT:
98                         return "INIT";
99                     case EventType::TERMINATED:
100                         return "TERMINATED";
101                     case EventType::BOOT_TIME_COLLECTION:
102                         return "BOOT_TIME_COLLECTION";
103                     case EventType::PERIODIC_COLLECTION:
104                         return "PERIODIC_COLLECTION";
105                     case EventType::CUSTOM_COLLECTION:
106                         return "CUSTOM_COLLECTION";
107                     case EventType::PERIODIC_MONITOR:
108                         return "PERIODIC_MONITOR";
109                     case EventType::LAST_EVENT:
110                         return "LAST_EVENT";
111                     case SwitchMessage::END_BOOTTIME_COLLECTION:
112                         return "END_BOOTTIME_COLLECTION";
113                     case SwitchMessage::END_CUSTOM_COLLECTION:
114                         return "END_CUSTOM_COLLECTION";
115                     default:
116                         return "INVALID_EVENT_OR_SWITCH_MESSAGE";
117                 }
118             },
119             what);
120 }
121 
toString(SystemState systemState)122 constexpr const char* toString(SystemState systemState) {
123     switch (systemState) {
124         case SystemState::NORMAL_MODE:
125             return "NORMAL_MODE";
126         case SystemState::GARAGE_MODE:
127             return "GARAGE_MODE";
128         default:
129             return "UNKNOWN MODE";
130     }
131 }
132 
133 }  // namespace
134 
toString() const135 std::string WatchdogPerfService::EventMetadata::toString() const {
136     std::string buffer;
137     const auto intervalInSecs = std::chrono::duration_cast<std::chrono::seconds>(interval).count();
138     StringAppendF(&buffer, "Event interval: %lld second%s\n", intervalInSecs,
139                   ((intervalInSecs > 1) ? "s" : ""));
140     if (!filterPackages.empty()) {
141         std::vector<std::string> packages(filterPackages.begin(), filterPackages.end());
142         StringAppendF(&buffer, "Filtered results to packages: %s\n", Join(packages, ", ").c_str());
143     }
144     return buffer;
145 }
146 
registerDataProcessor(sp<IDataProcessorInterface> processor)147 Result<void> WatchdogPerfService::registerDataProcessor(sp<IDataProcessorInterface> processor) {
148     if (processor == nullptr) {
149         return Error() << "Must provide a valid data processor";
150     }
151     if (const auto result = processor->init(); !result.ok()) {
152         return Error() << "Failed to initialize " << processor->name().c_str() << ": "
153                        << result.error().message();
154     }
155     Mutex::Autolock lock(mMutex);
156     mDataProcessors.push_back(processor);
157     if (DEBUG) {
158         ALOGD("Successfully registered %s to %s", processor->name().c_str(), kServiceName);
159     }
160     return {};
161 }
162 
start()163 Result<void> WatchdogPerfService::start() {
164     {
165         Mutex::Autolock lock(mMutex);
166         if (mCurrCollectionEvent != EventType::INIT || mCollectionThread.joinable()) {
167             return Error(INVALID_OPERATION) << "Cannot start " << kServiceName << " more than once";
168         }
169         std::chrono::nanoseconds boottimeCollectionInterval =
170                 std::chrono::duration_cast<std::chrono::nanoseconds>(
171                         std::chrono::seconds(sysprop::boottimeCollectionInterval().value_or(
172                                 kDefaultBoottimeCollectionInterval.count())));
173         std::chrono::nanoseconds periodicCollectionInterval =
174                 std::chrono::duration_cast<std::chrono::nanoseconds>(
175                         std::chrono::seconds(sysprop::periodicCollectionInterval().value_or(
176                                 kDefaultPeriodicCollectionInterval.count())));
177         std::chrono::nanoseconds periodicMonitorInterval =
178                 std::chrono::duration_cast<std::chrono::nanoseconds>(
179                         std::chrono::seconds(sysprop::periodicMonitorInterval().value_or(
180                                 kDefaultPeriodicMonitorInterval.count())));
181         mBoottimeCollection = {
182                 .eventType = EventType::BOOT_TIME_COLLECTION,
183                 .interval = boottimeCollectionInterval,
184                 .lastUptime = 0,
185         };
186         mPeriodicCollection = {
187                 .eventType = EventType::PERIODIC_COLLECTION,
188                 .interval = periodicCollectionInterval,
189                 .lastUptime = 0,
190         };
191         mPeriodicMonitor = {
192                 .eventType = EventType::PERIODIC_MONITOR,
193                 .interval = periodicMonitorInterval,
194                 .lastUptime = 0,
195         };
196         if (mDataProcessors.empty()) {
197             ALOGE("Terminating %s: No data processor is registered", kServiceName);
198             mCurrCollectionEvent = EventType::TERMINATED;
199             return Error() << "No data processor is registered";
200         }
201     }
202 
203     mCollectionThread = std::thread([&]() {
204         {
205             Mutex::Autolock lock(mMutex);
206             if (EventType expected = EventType::INIT; mCurrCollectionEvent != expected) {
207                 ALOGE("Skipping performance data collection as the current collection event "
208                       "%s != %s",
209                       toString(mCurrCollectionEvent), toString(expected));
210                 return;
211             }
212             mCurrCollectionEvent = EventType::BOOT_TIME_COLLECTION;
213             mBoottimeCollection.lastUptime = mHandlerLooper->now();
214             mHandlerLooper->setLooper(Looper::prepare(/*opts=*/0));
215             mHandlerLooper->sendMessage(this, EventType::BOOT_TIME_COLLECTION);
216         }
217         if (set_sched_policy(0, SP_BACKGROUND) != 0) {
218             ALOGW("Failed to set background scheduling priority to %s thread", kServiceName);
219         }
220         if (int result = pthread_setname_np(pthread_self(), "WatchdogPerfSvc"); result != 0) {
221             ALOGE("Failed to set %s thread name: %d", kServiceName, result);
222         }
223         ALOGI("Starting %s performance data collection", toString(mCurrCollectionEvent));
224         bool isCollectionActive = true;
225         /*
226          * Loop until the collection is not active -- performance collection runs on this thread in
227          * a handler.
228          */
229         while (isCollectionActive) {
230             mHandlerLooper->pollAll(/*timeoutMillis=*/-1);
231             Mutex::Autolock lock(mMutex);
232             isCollectionActive = mCurrCollectionEvent != EventType::TERMINATED;
233         }
234     });
235     return {};
236 }
237 
terminate()238 void WatchdogPerfService::terminate() {
239     {
240         Mutex::Autolock lock(mMutex);
241         if (mCurrCollectionEvent == EventType::TERMINATED) {
242             ALOGE("%s was terminated already", kServiceName);
243             return;
244         }
245         ALOGE("Terminating %s as carwatchdog is terminating", kServiceName);
246         if (mCurrCollectionEvent != EventType::INIT) {
247             /*
248              * Looper runs only after EventType::TNIT has completed so remove looper messages
249              * and wake the looper only when the current collection has changed from INIT.
250              */
251             mHandlerLooper->removeMessages(this);
252             mHandlerLooper->wake();
253         }
254         for (const auto& processor : mDataProcessors) {
255             processor->terminate();
256         }
257         mCurrCollectionEvent = EventType::TERMINATED;
258     }
259     if (mCollectionThread.joinable()) {
260         mCollectionThread.join();
261         if (DEBUG) {
262             ALOGD("%s collection thread terminated", kServiceName);
263         }
264     }
265 }
266 
setSystemState(SystemState systemState)267 void WatchdogPerfService::setSystemState(SystemState systemState) {
268     Mutex::Autolock lock(mMutex);
269     if (mSystemState != systemState) {
270         ALOGI("%s switching from %s to %s", kServiceName, toString(mSystemState),
271               toString(systemState));
272     }
273     mSystemState = systemState;
274 }
275 
onBootFinished()276 Result<void> WatchdogPerfService::onBootFinished() {
277     Mutex::Autolock lock(mMutex);
278     if (EventType expected = EventType::BOOT_TIME_COLLECTION; mCurrCollectionEvent != expected) {
279         /*
280          * This case happens when either the WatchdogPerfService has prematurely terminated before
281          * boot complete notification is received or multiple boot complete notifications are
282          * received. In either case don't return error as this will lead to runtime exception and
283          * cause system to boot loop.
284          */
285         ALOGE("Current performance data collection event %s != %s", toString(mCurrCollectionEvent),
286               toString(expected));
287         return {};
288     }
289     mBoottimeCollection.lastUptime = mHandlerLooper->now();
290     mHandlerLooper->removeMessages(this);
291     mHandlerLooper->sendMessage(this, SwitchMessage::END_BOOTTIME_COLLECTION);
292     if (DEBUG) {
293         ALOGD("Boot-time event finished");
294     }
295     return {};
296 }
297 
onCustomCollection(int fd,const Vector<String16> & args)298 Result<void> WatchdogPerfService::onCustomCollection(int fd, const Vector<String16>& args) {
299     if (args.empty()) {
300         return Error(BAD_VALUE) << "No custom collection dump arguments";
301     }
302 
303     if (args[0] == String16(kStartCustomCollectionFlag)) {
304         if (args.size() > 7) {
305             return Error(BAD_VALUE) << "Number of arguments to start custom performance data "
306                                     << "collection cannot exceed 7";
307         }
308         std::chrono::nanoseconds interval = kCustomCollectionInterval;
309         std::chrono::nanoseconds maxDuration = kCustomCollectionDuration;
310         std::unordered_set<std::string> filterPackages;
311         for (size_t i = 1; i < args.size(); ++i) {
312             if (args[i] == String16(kIntervalFlag)) {
313                 const auto& result = parseSecondsFlag(args, i + 1);
314                 if (!result.ok()) {
315                     return Error(BAD_VALUE)
316                             << "Failed to parse " << kIntervalFlag << ": " << result.error();
317                 }
318                 interval = std::chrono::duration_cast<std::chrono::nanoseconds>(*result);
319                 ++i;
320                 continue;
321             }
322             if (args[i] == String16(kMaxDurationFlag)) {
323                 const auto& result = parseSecondsFlag(args, i + 1);
324                 if (!result.ok()) {
325                     return Error(BAD_VALUE)
326                             << "Failed to parse " << kMaxDurationFlag << ": " << result.error();
327                 }
328                 maxDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(*result);
329                 ++i;
330                 continue;
331             }
332             if (args[i] == String16(kFilterPackagesFlag)) {
333                 if (args.size() < i + 1) {
334                     return Error(BAD_VALUE)
335                             << "Must provide value for '" << kFilterPackagesFlag << "' flag";
336                 }
337                 std::vector<std::string> packages =
338                         Split(std::string(String8(args[i + 1]).string()), ",");
339                 std::copy(packages.begin(), packages.end(),
340                           std::inserter(filterPackages, filterPackages.end()));
341                 ++i;
342                 continue;
343             }
344             ALOGW("Unknown flag %s provided to start custom performance data collection",
345                   String8(args[i]).string());
346             return Error(BAD_VALUE) << "Unknown flag " << String8(args[i]).string()
347                                     << " provided to start custom performance data collection";
348         }
349         if (const auto& result = startCustomCollection(interval, maxDuration, filterPackages);
350             !result.ok()) {
351             WriteStringToFd(result.error().message(), fd);
352             return result;
353         }
354         return {};
355     }
356 
357     if (args[0] == String16(kEndCustomCollectionFlag)) {
358         if (args.size() != 1) {
359             ALOGW("Number of arguments to stop custom performance data collection cannot exceed 1. "
360                   "Stopping the data collection.");
361             WriteStringToFd("Number of arguments to stop custom performance data collection "
362                             "cannot exceed 1. Stopping the data collection.",
363                             fd);
364         }
365         return endCustomCollection(fd);
366     }
367 
368     return Error(BAD_VALUE) << "Custom perf collection dump arguments start neither with "
369                             << kStartCustomCollectionFlag << " nor with "
370                             << kEndCustomCollectionFlag << " flags";
371 }
372 
onDump(int fd)373 Result<void> WatchdogPerfService::onDump(int fd) {
374     Mutex::Autolock lock(mMutex);
375     if (mCurrCollectionEvent == EventType::TERMINATED) {
376         ALOGW("%s not active. Dumping cached data", kServiceName);
377         if (!WriteStringToFd(StringPrintf("%s not active. Dumping cached data.", kServiceName),
378                              fd)) {
379             return Error(FAILED_TRANSACTION) << "Failed to write " << kServiceName << " status";
380         }
381     }
382 
383     if (const auto& result = dumpCollectorsStatusLocked(fd); !result.ok()) {
384         return Error(FAILED_TRANSACTION) << result.error();
385     }
386 
387     if (!WriteStringToFd(StringPrintf("\n%s%s report:\n%sBoot-time collection information:\n%s\n",
388                                       kDumpMajorDelimiter.c_str(), kServiceName,
389                                       kDumpMajorDelimiter.c_str(), std::string(33, '=').c_str()),
390                          fd) ||
391         !WriteStringToFd(mBoottimeCollection.toString(), fd) ||
392         !WriteStringToFd(StringPrintf("\nPeriodic collection information:\n%s\n",
393                                       std::string(32, '=').c_str()),
394                          fd) ||
395         !WriteStringToFd(mPeriodicCollection.toString(), fd)) {
396         return Error(FAILED_TRANSACTION)
397                 << "Failed to dump the boot-time and periodic collection reports.";
398     }
399 
400     for (const auto& processor : mDataProcessors) {
401         if (const auto result = processor->onDump(fd); !result.ok()) {
402             return result;
403         }
404     }
405 
406     WriteStringToFd(kDumpMajorDelimiter, fd);
407     return {};
408 }
409 
dumpHelpText(int fd)410 bool WatchdogPerfService::dumpHelpText(int fd) {
411     return WriteStringToFd(StringPrintf(kHelpText, kServiceName, kStartCustomCollectionFlag,
412                                         kIntervalFlag,
413                                         std::chrono::duration_cast<std::chrono::seconds>(
414                                                 kCustomCollectionInterval)
415                                                 .count(),
416                                         kMaxDurationFlag,
417                                         std::chrono::duration_cast<std::chrono::minutes>(
418                                                 kCustomCollectionDuration)
419                                                 .count(),
420                                         kFilterPackagesFlag, kEndCustomCollectionFlag),
421                            fd);
422 }
423 
dumpCollectorsStatusLocked(int fd)424 Result<void> WatchdogPerfService::dumpCollectorsStatusLocked(int fd) {
425     if (!mUidIoStats->enabled() &&
426         !WriteStringToFd(StringPrintf("UidIoStats collector failed to access the file %s",
427                                       mUidIoStats->filePath().c_str()),
428                          fd)) {
429         return Error() << "Failed to write UidIoStats collector status";
430     }
431     if (!mProcStat->enabled() &&
432         !WriteStringToFd(StringPrintf("ProcStat collector failed to access the file %s",
433                                       mProcStat->filePath().c_str()),
434                          fd)) {
435         return Error() << "Failed to write ProcStat collector status";
436     }
437     if (!mProcPidStat->enabled() &&
438         !WriteStringToFd(StringPrintf("ProcPidStat collector failed to access the directory %s",
439                                       mProcPidStat->dirPath().c_str()),
440                          fd)) {
441         return Error() << "Failed to write ProcPidStat collector status";
442     }
443     return {};
444 }
445 
startCustomCollection(std::chrono::nanoseconds interval,std::chrono::nanoseconds maxDuration,const std::unordered_set<std::string> & filterPackages)446 Result<void> WatchdogPerfService::startCustomCollection(
447         std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
448         const std::unordered_set<std::string>& filterPackages) {
449     if (interval < kMinEventInterval || maxDuration < kMinEventInterval) {
450         return Error(INVALID_OPERATION)
451                 << "Collection interval and maximum duration must be >= "
452                 << std::chrono::duration_cast<std::chrono::milliseconds>(kMinEventInterval).count()
453                 << " milliseconds.";
454     }
455     Mutex::Autolock lock(mMutex);
456     if (EventType expected = EventType::PERIODIC_COLLECTION; mCurrCollectionEvent != expected) {
457         return Error(INVALID_OPERATION)
458                 << "Cannot start a custom collection when the current collection event "
459                 << toString(mCurrCollectionEvent) << " != " << toString(expected)
460                 << " collection event";
461     }
462 
463     mCustomCollection = {
464             .eventType = EventType::CUSTOM_COLLECTION,
465             .interval = interval,
466             .lastUptime = mHandlerLooper->now(),
467             .filterPackages = filterPackages,
468     };
469 
470     mHandlerLooper->removeMessages(this);
471     nsecs_t uptime = mHandlerLooper->now() + maxDuration.count();
472     mHandlerLooper->sendMessageAtTime(uptime, this, SwitchMessage::END_CUSTOM_COLLECTION);
473     mCurrCollectionEvent = EventType::CUSTOM_COLLECTION;
474     mHandlerLooper->sendMessage(this, EventType::CUSTOM_COLLECTION);
475     ALOGI("Starting %s performance data collection", toString(mCurrCollectionEvent));
476     return {};
477 }
478 
endCustomCollection(int fd)479 Result<void> WatchdogPerfService::endCustomCollection(int fd) {
480     Mutex::Autolock lock(mMutex);
481     if (mCurrCollectionEvent != EventType::CUSTOM_COLLECTION) {
482         return Error(INVALID_OPERATION) << "No custom collection is running";
483     }
484 
485     mHandlerLooper->removeMessages(this);
486     mHandlerLooper->sendMessage(this, SwitchMessage::END_CUSTOM_COLLECTION);
487 
488     if (const auto result = dumpCollectorsStatusLocked(fd); !result.ok()) {
489         return Error(FAILED_TRANSACTION) << result.error();
490     }
491 
492     if (!WriteStringToFd(StringPrintf("%sPerformance data report for custom collection:\n%s",
493                                       kDumpMajorDelimiter.c_str(), kDumpMajorDelimiter.c_str()),
494                          fd) ||
495         !WriteStringToFd(mCustomCollection.toString(), fd)) {
496         return Error(FAILED_TRANSACTION) << "Failed to write custom collection report.";
497     }
498 
499     for (const auto& processor : mDataProcessors) {
500         if (const auto result = processor->onCustomCollectionDump(fd); !result.ok()) {
501             return Error() << processor->name() << " failed on " << toString(mCurrCollectionEvent)
502                            << " collection: " << result.error();
503         }
504     }
505 
506     if (DEBUG) {
507         ALOGD("Custom event finished");
508     }
509     WriteStringToFd(kDumpMajorDelimiter, fd);
510     return {};
511 }
512 
handleMessage(const Message & message)513 void WatchdogPerfService::handleMessage(const Message& message) {
514     Result<void> result;
515 
516     auto switchToPeriodicLocked = [&](bool startNow) {
517         mHandlerLooper->removeMessages(this);
518         mCurrCollectionEvent = EventType::PERIODIC_COLLECTION;
519         mPeriodicCollection.lastUptime = mHandlerLooper->now();
520         if (startNow) {
521             mHandlerLooper->sendMessage(this, EventType::PERIODIC_COLLECTION);
522         } else {
523             mPeriodicCollection.lastUptime += mPeriodicCollection.interval.count();
524             mHandlerLooper->sendMessageAtTime(mPeriodicCollection.lastUptime, this,
525                                               EventType::PERIODIC_COLLECTION);
526         }
527         mPeriodicMonitor.lastUptime = mHandlerLooper->now() + mPeriodicMonitor.interval.count();
528         mHandlerLooper->sendMessageAtTime(mPeriodicMonitor.lastUptime, this,
529                                           EventType::PERIODIC_MONITOR);
530         ALOGI("Switching to %s and %s", toString(mCurrCollectionEvent),
531               toString(EventType::PERIODIC_MONITOR));
532     };
533 
534     switch (message.what) {
535         case static_cast<int>(EventType::BOOT_TIME_COLLECTION):
536             result = processCollectionEvent(&mBoottimeCollection);
537             break;
538         case static_cast<int>(SwitchMessage::END_BOOTTIME_COLLECTION):
539             if (result = processCollectionEvent(&mBoottimeCollection); result.ok()) {
540                 Mutex::Autolock lock(mMutex);
541                 switchToPeriodicLocked(/*startNow=*/false);
542             }
543             break;
544         case static_cast<int>(EventType::PERIODIC_COLLECTION):
545             result = processCollectionEvent(&mPeriodicCollection);
546             break;
547         case static_cast<int>(EventType::CUSTOM_COLLECTION):
548             result = processCollectionEvent(&mCustomCollection);
549             break;
550         case static_cast<int>(EventType::PERIODIC_MONITOR):
551             result = processMonitorEvent(&mPeriodicMonitor);
552             break;
553         case static_cast<int>(SwitchMessage::END_CUSTOM_COLLECTION): {
554             Mutex::Autolock lock(mMutex);
555             if (EventType expected = EventType::CUSTOM_COLLECTION;
556                 mCurrCollectionEvent != expected) {
557                 ALOGW("Skipping END_CUSTOM_COLLECTION message as the current collection %s != %s",
558                       toString(mCurrCollectionEvent), toString(expected));
559                 return;
560             }
561             mCustomCollection = {};
562             for (const auto& processor : mDataProcessors) {
563                 /*
564                  * Clear custom collection cache on the data processors when the custom collection
565                  * ends.
566                  */
567                 processor->onCustomCollectionDump(-1);
568             }
569             switchToPeriodicLocked(/*startNow=*/true);
570             return;
571         }
572         default:
573             result = Error() << "Unknown message: " << message.what;
574     }
575 
576     if (!result.ok()) {
577         Mutex::Autolock lock(mMutex);
578         ALOGE("Terminating %s: %s", kServiceName, result.error().message().c_str());
579         /*
580          * DO NOT CALL terminate() as it tries to join the collection thread but this code is
581          * executed on the collection thread. Thus it will result in a deadlock.
582          */
583         mCurrCollectionEvent = EventType::TERMINATED;
584         mHandlerLooper->removeMessages(this);
585         mHandlerLooper->wake();
586     }
587 }
588 
processCollectionEvent(WatchdogPerfService::EventMetadata * metadata)589 Result<void> WatchdogPerfService::processCollectionEvent(
590         WatchdogPerfService::EventMetadata* metadata) {
591     Mutex::Autolock lock(mMutex);
592     /*
593      * Messages sent to the looper are intrinsically racy such that a message from the previous
594      * collection event may land in the looper after the current collection has already begun. Thus
595      * verify the current collection event before starting the collection.
596      */
597     if (mCurrCollectionEvent != metadata->eventType) {
598         ALOGW("Skipping %s event on collection event %s", toString(metadata->eventType),
599               toString(mCurrCollectionEvent));
600         return {};
601     }
602     if (DEBUG) {
603         ALOGD("Processing %s collection event", toString(metadata->eventType));
604     }
605     if (metadata->interval < kMinEventInterval) {
606         return Error()
607                 << "Collection interval of "
608                 << std::chrono::duration_cast<std::chrono::seconds>(metadata->interval).count()
609                 << " seconds for " << toString(metadata->eventType)
610                 << " collection cannot be less than "
611                 << std::chrono::duration_cast<std::chrono::seconds>(kMinEventInterval).count()
612                 << " seconds";
613     }
614     if (const auto result = collectLocked(metadata); !result.ok()) {
615         return Error() << toString(metadata->eventType) << " collection failed: " << result.error();
616     }
617     metadata->lastUptime += metadata->interval.count();
618     mHandlerLooper->sendMessageAtTime(metadata->lastUptime, this, metadata->eventType);
619     return {};
620 }
621 
collectLocked(WatchdogPerfService::EventMetadata * metadata)622 Result<void> WatchdogPerfService::collectLocked(WatchdogPerfService::EventMetadata* metadata) {
623     if (!mUidIoStats->enabled() && !mProcStat->enabled() && !mProcPidStat->enabled()) {
624         return Error() << "No collectors enabled";
625     }
626 
627     time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
628 
629     if (mUidIoStats->enabled()) {
630         if (const auto result = mUidIoStats->collect(); !result.ok()) {
631             return Error() << "Failed to collect per-uid I/O usage: " << result.error();
632         }
633     }
634 
635     if (mProcStat->enabled()) {
636         if (const auto result = mProcStat->collect(); !result.ok()) {
637             return Error() << "Failed to collect proc stats: " << result.error();
638         }
639     }
640 
641     if (mProcPidStat->enabled()) {
642         if (const auto result = mProcPidStat->collect(); !result.ok()) {
643             return Error() << "Failed to collect process stats: " << result.error();
644         }
645     }
646 
647     for (const auto& processor : mDataProcessors) {
648         Result<void> result;
649         switch (mCurrCollectionEvent) {
650             case EventType::BOOT_TIME_COLLECTION:
651                 result = processor->onBoottimeCollection(now, mUidIoStats, mProcStat, mProcPidStat);
652                 break;
653             case EventType::PERIODIC_COLLECTION:
654                 result = processor->onPeriodicCollection(now, mSystemState, mUidIoStats, mProcStat,
655                                                          mProcPidStat);
656                 break;
657             case EventType::CUSTOM_COLLECTION:
658                 result = processor->onCustomCollection(now, mSystemState, metadata->filterPackages,
659                                                        mUidIoStats, mProcStat, mProcPidStat);
660                 break;
661             default:
662                 result = Error() << "Invalid collection event " << toString(mCurrCollectionEvent);
663         }
664         if (!result.ok()) {
665             return Error() << processor->name() << " failed on " << toString(mCurrCollectionEvent)
666                            << " collection: " << result.error();
667         }
668     }
669 
670     return {};
671 }
672 
processMonitorEvent(WatchdogPerfService::EventMetadata * metadata)673 Result<void> WatchdogPerfService::processMonitorEvent(
674         WatchdogPerfService::EventMetadata* metadata) {
675     if (metadata->eventType != static_cast<int>(EventType::PERIODIC_MONITOR)) {
676         return Error() << "Invalid monitor event " << toString(metadata->eventType);
677     }
678     if (DEBUG) {
679         ALOGD("Processing %s monitor event", toString(metadata->eventType));
680     }
681     if (metadata->interval < kMinEventInterval) {
682         return Error()
683                 << "Monitor interval of "
684                 << std::chrono::duration_cast<std::chrono::seconds>(metadata->interval).count()
685                 << " seconds for " << toString(metadata->eventType) << " event cannot be less than "
686                 << std::chrono::duration_cast<std::chrono::seconds>(kMinEventInterval).count()
687                 << " seconds";
688     }
689     Mutex::Autolock lock(mMutex);
690     if (!mProcDiskStats->enabled()) {
691         return Error() << "Cannot access proc disk stats for monitoring";
692     }
693     time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
694     if (const auto result = mProcDiskStats->collect(); !result.ok()) {
695         return Error() << "Failed to collect disk stats: " << result.error();
696     }
697     auto* currCollectionMetadata = currCollectionMetadataLocked();
698     if (currCollectionMetadata == nullptr) {
699         return Error() << "No metadata available for current collection event: "
700                        << toString(mCurrCollectionEvent);
701     }
702     bool requestedCollection = false;
703     const auto requestCollection = [&]() mutable {
704         if (requestedCollection) {
705             return;
706         }
707         const nsecs_t prevUptime =
708                 currCollectionMetadata->lastUptime - currCollectionMetadata->interval.count();
709         nsecs_t uptime = mHandlerLooper->now();
710         if (const auto delta = std::abs(uptime - prevUptime); delta < kMinEventInterval.count()) {
711             return;
712         }
713         currCollectionMetadata->lastUptime = uptime;
714         mHandlerLooper->removeMessages(this, currCollectionMetadata->eventType);
715         mHandlerLooper->sendMessage(this, currCollectionMetadata->eventType);
716         requestedCollection = true;
717     };
718     for (const auto& processor : mDataProcessors) {
719         if (const auto result =
720                     processor->onPeriodicMonitor(now, mProcDiskStats, requestCollection);
721             !result.ok()) {
722             return Error() << processor->name() << " failed on " << toString(metadata->eventType)
723                            << ": " << result.error();
724         }
725     }
726     metadata->lastUptime += metadata->interval.count();
727     if (metadata->lastUptime == currCollectionMetadata->lastUptime) {
728         /*
729          * If the |PERIODIC_MONITOR| and  *_COLLECTION events overlap, skip the |PERIODIC_MONITOR|
730          * event.
731          */
732         metadata->lastUptime += metadata->interval.count();
733     }
734     mHandlerLooper->sendMessageAtTime(metadata->lastUptime, this, metadata->eventType);
735     return {};
736 }
737 
currCollectionMetadataLocked()738 WatchdogPerfService::EventMetadata* WatchdogPerfService::currCollectionMetadataLocked() {
739     switch (mCurrCollectionEvent) {
740         case EventType::BOOT_TIME_COLLECTION:
741             return &mBoottimeCollection;
742         case EventType::PERIODIC_COLLECTION:
743             return &mPeriodicCollection;
744         case EventType::CUSTOM_COLLECTION:
745             return &mCustomCollection;
746         default:
747             return nullptr;
748     }
749 }
750 
751 }  // namespace watchdog
752 }  // namespace automotive
753 }  // namespace android
754