1 /*
2 * Copyright (c) 2020, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false // STOPSHIP if true.
19
20 #include "WatchdogPerfService.h"
21
22 #include <WatchdogProperties.sysprop.h>
23 #include <android-base/file.h>
24 #include <android-base/parseint.h>
25 #include <android-base/stringprintf.h>
26 #include <android-base/strings.h>
27 #include <log/log.h>
28 #include <processgroup/sched_policy.h>
29
30 #include <pthread.h>
31
32 #include <iterator>
33 #include <vector>
34
35 namespace android {
36 namespace automotive {
37 namespace watchdog {
38
39 using ::android::sp;
40 using ::android::String16;
41 using ::android::String8;
42 using ::android::automotive::watchdog::internal::PowerCycle;
43 using ::android::base::Error;
44 using ::android::base::Join;
45 using ::android::base::ParseUint;
46 using ::android::base::Result;
47 using ::android::base::Split;
48 using ::android::base::StringAppendF;
49 using ::android::base::StringPrintf;
50 using ::android::base::WriteStringToFd;
51
52 namespace {
53
54 // Minimum required collection interval between subsequent collections.
55 const std::chrono::nanoseconds kMinEventInterval = 1s;
56 const std::chrono::seconds kDefaultBoottimeCollectionInterval = 1s;
57 const std::chrono::seconds kDefaultPeriodicCollectionInterval = 20s;
58 const std::chrono::seconds kDefaultPeriodicMonitorInterval = 5s;
59 const std::chrono::nanoseconds kCustomCollectionInterval = 10s;
60 const std::chrono::nanoseconds kCustomCollectionDuration = 30min;
61
62 constexpr const char* kServiceName = "WatchdogPerfService";
63 static const std::string kDumpMajorDelimiter = std::string(100, '-') + "\n"; // NOLINT
64 constexpr const char* kHelpText =
65 "\n%s dump options:\n"
66 "%s: Starts custom performance data collection. Customize the collection behavior with "
67 "the following optional arguments:\n"
68 "\t%s <seconds>: Modifies the collection interval. Default behavior is to collect once "
69 "every %lld seconds.\n"
70 "\t%s <seconds>: Modifies the maximum collection duration. Default behavior is to collect "
71 "until %ld minutes before automatically stopping the custom collection and discarding "
72 "the collected data.\n"
73 "\t%s <package name>,<package name>,...: Comma-separated value containing package names. "
74 "When provided, the results are filtered only to the provided package names. Default "
75 "behavior is to list the results for the top N packages.\n"
76 "%s: Stops custom performance data collection and generates a dump of "
77 "the collection report.\n\n"
78 "When no options are specified, the carwatchdog report contains the performance data "
79 "collected during boot-time and over the last few minutes before the report generation.\n";
80
parseSecondsFlag(const Vector<String16> & args,size_t pos)81 Result<std::chrono::seconds> parseSecondsFlag(const Vector<String16>& args, size_t pos) {
82 if (args.size() <= pos) {
83 return Error() << "Value not provided";
84 }
85 uint64_t value;
86 if (std::string strValue = std::string(String8(args[pos]).string());
87 !ParseUint(strValue, &value)) {
88 return Error() << "Invalid value " << strValue << ", must be an integer";
89 }
90 return std::chrono::seconds(value);
91 }
92
toString(std::variant<EventType,SwitchMessage> what)93 constexpr const char* toString(std::variant<EventType, SwitchMessage> what) {
94 return std::visit(
95 [&](const auto& v) -> const char* {
96 switch (static_cast<int>(v)) {
97 case EventType::INIT:
98 return "INIT";
99 case EventType::TERMINATED:
100 return "TERMINATED";
101 case EventType::BOOT_TIME_COLLECTION:
102 return "BOOT_TIME_COLLECTION";
103 case EventType::PERIODIC_COLLECTION:
104 return "PERIODIC_COLLECTION";
105 case EventType::CUSTOM_COLLECTION:
106 return "CUSTOM_COLLECTION";
107 case EventType::PERIODIC_MONITOR:
108 return "PERIODIC_MONITOR";
109 case EventType::LAST_EVENT:
110 return "LAST_EVENT";
111 case SwitchMessage::END_BOOTTIME_COLLECTION:
112 return "END_BOOTTIME_COLLECTION";
113 case SwitchMessage::END_CUSTOM_COLLECTION:
114 return "END_CUSTOM_COLLECTION";
115 default:
116 return "INVALID_EVENT_OR_SWITCH_MESSAGE";
117 }
118 },
119 what);
120 }
121
toString(SystemState systemState)122 constexpr const char* toString(SystemState systemState) {
123 switch (systemState) {
124 case SystemState::NORMAL_MODE:
125 return "NORMAL_MODE";
126 case SystemState::GARAGE_MODE:
127 return "GARAGE_MODE";
128 default:
129 return "UNKNOWN MODE";
130 }
131 }
132
133 } // namespace
134
toString() const135 std::string WatchdogPerfService::EventMetadata::toString() const {
136 std::string buffer;
137 const auto intervalInSecs = std::chrono::duration_cast<std::chrono::seconds>(interval).count();
138 StringAppendF(&buffer, "Event interval: %lld second%s\n", intervalInSecs,
139 ((intervalInSecs > 1) ? "s" : ""));
140 if (!filterPackages.empty()) {
141 std::vector<std::string> packages(filterPackages.begin(), filterPackages.end());
142 StringAppendF(&buffer, "Filtered results to packages: %s\n", Join(packages, ", ").c_str());
143 }
144 return buffer;
145 }
146
registerDataProcessor(sp<IDataProcessorInterface> processor)147 Result<void> WatchdogPerfService::registerDataProcessor(sp<IDataProcessorInterface> processor) {
148 if (processor == nullptr) {
149 return Error() << "Must provide a valid data processor";
150 }
151 if (const auto result = processor->init(); !result.ok()) {
152 return Error() << "Failed to initialize " << processor->name().c_str() << ": "
153 << result.error().message();
154 }
155 Mutex::Autolock lock(mMutex);
156 mDataProcessors.push_back(processor);
157 if (DEBUG) {
158 ALOGD("Successfully registered %s to %s", processor->name().c_str(), kServiceName);
159 }
160 return {};
161 }
162
start()163 Result<void> WatchdogPerfService::start() {
164 {
165 Mutex::Autolock lock(mMutex);
166 if (mCurrCollectionEvent != EventType::INIT || mCollectionThread.joinable()) {
167 return Error(INVALID_OPERATION) << "Cannot start " << kServiceName << " more than once";
168 }
169 std::chrono::nanoseconds boottimeCollectionInterval =
170 std::chrono::duration_cast<std::chrono::nanoseconds>(
171 std::chrono::seconds(sysprop::boottimeCollectionInterval().value_or(
172 kDefaultBoottimeCollectionInterval.count())));
173 std::chrono::nanoseconds periodicCollectionInterval =
174 std::chrono::duration_cast<std::chrono::nanoseconds>(
175 std::chrono::seconds(sysprop::periodicCollectionInterval().value_or(
176 kDefaultPeriodicCollectionInterval.count())));
177 std::chrono::nanoseconds periodicMonitorInterval =
178 std::chrono::duration_cast<std::chrono::nanoseconds>(
179 std::chrono::seconds(sysprop::periodicMonitorInterval().value_or(
180 kDefaultPeriodicMonitorInterval.count())));
181 mBoottimeCollection = {
182 .eventType = EventType::BOOT_TIME_COLLECTION,
183 .interval = boottimeCollectionInterval,
184 .lastUptime = 0,
185 };
186 mPeriodicCollection = {
187 .eventType = EventType::PERIODIC_COLLECTION,
188 .interval = periodicCollectionInterval,
189 .lastUptime = 0,
190 };
191 mPeriodicMonitor = {
192 .eventType = EventType::PERIODIC_MONITOR,
193 .interval = periodicMonitorInterval,
194 .lastUptime = 0,
195 };
196 if (mDataProcessors.empty()) {
197 ALOGE("Terminating %s: No data processor is registered", kServiceName);
198 mCurrCollectionEvent = EventType::TERMINATED;
199 return Error() << "No data processor is registered";
200 }
201 }
202
203 mCollectionThread = std::thread([&]() {
204 {
205 Mutex::Autolock lock(mMutex);
206 if (EventType expected = EventType::INIT; mCurrCollectionEvent != expected) {
207 ALOGE("Skipping performance data collection as the current collection event "
208 "%s != %s",
209 toString(mCurrCollectionEvent), toString(expected));
210 return;
211 }
212 mCurrCollectionEvent = EventType::BOOT_TIME_COLLECTION;
213 mBoottimeCollection.lastUptime = mHandlerLooper->now();
214 mHandlerLooper->setLooper(Looper::prepare(/*opts=*/0));
215 mHandlerLooper->sendMessage(this, EventType::BOOT_TIME_COLLECTION);
216 }
217 if (set_sched_policy(0, SP_BACKGROUND) != 0) {
218 ALOGW("Failed to set background scheduling priority to %s thread", kServiceName);
219 }
220 if (int result = pthread_setname_np(pthread_self(), "WatchdogPerfSvc"); result != 0) {
221 ALOGE("Failed to set %s thread name: %d", kServiceName, result);
222 }
223 ALOGI("Starting %s performance data collection", toString(mCurrCollectionEvent));
224 bool isCollectionActive = true;
225 /*
226 * Loop until the collection is not active -- performance collection runs on this thread in
227 * a handler.
228 */
229 while (isCollectionActive) {
230 mHandlerLooper->pollAll(/*timeoutMillis=*/-1);
231 Mutex::Autolock lock(mMutex);
232 isCollectionActive = mCurrCollectionEvent != EventType::TERMINATED;
233 }
234 });
235 return {};
236 }
237
terminate()238 void WatchdogPerfService::terminate() {
239 {
240 Mutex::Autolock lock(mMutex);
241 if (mCurrCollectionEvent == EventType::TERMINATED) {
242 ALOGE("%s was terminated already", kServiceName);
243 return;
244 }
245 ALOGE("Terminating %s as carwatchdog is terminating", kServiceName);
246 if (mCurrCollectionEvent != EventType::INIT) {
247 /*
248 * Looper runs only after EventType::TNIT has completed so remove looper messages
249 * and wake the looper only when the current collection has changed from INIT.
250 */
251 mHandlerLooper->removeMessages(this);
252 mHandlerLooper->wake();
253 }
254 for (const auto& processor : mDataProcessors) {
255 processor->terminate();
256 }
257 mCurrCollectionEvent = EventType::TERMINATED;
258 }
259 if (mCollectionThread.joinable()) {
260 mCollectionThread.join();
261 if (DEBUG) {
262 ALOGD("%s collection thread terminated", kServiceName);
263 }
264 }
265 }
266
setSystemState(SystemState systemState)267 void WatchdogPerfService::setSystemState(SystemState systemState) {
268 Mutex::Autolock lock(mMutex);
269 if (mSystemState != systemState) {
270 ALOGI("%s switching from %s to %s", kServiceName, toString(mSystemState),
271 toString(systemState));
272 }
273 mSystemState = systemState;
274 }
275
onBootFinished()276 Result<void> WatchdogPerfService::onBootFinished() {
277 Mutex::Autolock lock(mMutex);
278 if (EventType expected = EventType::BOOT_TIME_COLLECTION; mCurrCollectionEvent != expected) {
279 /*
280 * This case happens when either the WatchdogPerfService has prematurely terminated before
281 * boot complete notification is received or multiple boot complete notifications are
282 * received. In either case don't return error as this will lead to runtime exception and
283 * cause system to boot loop.
284 */
285 ALOGE("Current performance data collection event %s != %s", toString(mCurrCollectionEvent),
286 toString(expected));
287 return {};
288 }
289 mBoottimeCollection.lastUptime = mHandlerLooper->now();
290 mHandlerLooper->removeMessages(this);
291 mHandlerLooper->sendMessage(this, SwitchMessage::END_BOOTTIME_COLLECTION);
292 if (DEBUG) {
293 ALOGD("Boot-time event finished");
294 }
295 return {};
296 }
297
onCustomCollection(int fd,const Vector<String16> & args)298 Result<void> WatchdogPerfService::onCustomCollection(int fd, const Vector<String16>& args) {
299 if (args.empty()) {
300 return Error(BAD_VALUE) << "No custom collection dump arguments";
301 }
302
303 if (args[0] == String16(kStartCustomCollectionFlag)) {
304 if (args.size() > 7) {
305 return Error(BAD_VALUE) << "Number of arguments to start custom performance data "
306 << "collection cannot exceed 7";
307 }
308 std::chrono::nanoseconds interval = kCustomCollectionInterval;
309 std::chrono::nanoseconds maxDuration = kCustomCollectionDuration;
310 std::unordered_set<std::string> filterPackages;
311 for (size_t i = 1; i < args.size(); ++i) {
312 if (args[i] == String16(kIntervalFlag)) {
313 const auto& result = parseSecondsFlag(args, i + 1);
314 if (!result.ok()) {
315 return Error(BAD_VALUE)
316 << "Failed to parse " << kIntervalFlag << ": " << result.error();
317 }
318 interval = std::chrono::duration_cast<std::chrono::nanoseconds>(*result);
319 ++i;
320 continue;
321 }
322 if (args[i] == String16(kMaxDurationFlag)) {
323 const auto& result = parseSecondsFlag(args, i + 1);
324 if (!result.ok()) {
325 return Error(BAD_VALUE)
326 << "Failed to parse " << kMaxDurationFlag << ": " << result.error();
327 }
328 maxDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(*result);
329 ++i;
330 continue;
331 }
332 if (args[i] == String16(kFilterPackagesFlag)) {
333 if (args.size() < i + 1) {
334 return Error(BAD_VALUE)
335 << "Must provide value for '" << kFilterPackagesFlag << "' flag";
336 }
337 std::vector<std::string> packages =
338 Split(std::string(String8(args[i + 1]).string()), ",");
339 std::copy(packages.begin(), packages.end(),
340 std::inserter(filterPackages, filterPackages.end()));
341 ++i;
342 continue;
343 }
344 ALOGW("Unknown flag %s provided to start custom performance data collection",
345 String8(args[i]).string());
346 return Error(BAD_VALUE) << "Unknown flag " << String8(args[i]).string()
347 << " provided to start custom performance data collection";
348 }
349 if (const auto& result = startCustomCollection(interval, maxDuration, filterPackages);
350 !result.ok()) {
351 WriteStringToFd(result.error().message(), fd);
352 return result;
353 }
354 return {};
355 }
356
357 if (args[0] == String16(kEndCustomCollectionFlag)) {
358 if (args.size() != 1) {
359 ALOGW("Number of arguments to stop custom performance data collection cannot exceed 1. "
360 "Stopping the data collection.");
361 WriteStringToFd("Number of arguments to stop custom performance data collection "
362 "cannot exceed 1. Stopping the data collection.",
363 fd);
364 }
365 return endCustomCollection(fd);
366 }
367
368 return Error(BAD_VALUE) << "Custom perf collection dump arguments start neither with "
369 << kStartCustomCollectionFlag << " nor with "
370 << kEndCustomCollectionFlag << " flags";
371 }
372
onDump(int fd)373 Result<void> WatchdogPerfService::onDump(int fd) {
374 Mutex::Autolock lock(mMutex);
375 if (mCurrCollectionEvent == EventType::TERMINATED) {
376 ALOGW("%s not active. Dumping cached data", kServiceName);
377 if (!WriteStringToFd(StringPrintf("%s not active. Dumping cached data.", kServiceName),
378 fd)) {
379 return Error(FAILED_TRANSACTION) << "Failed to write " << kServiceName << " status";
380 }
381 }
382
383 if (const auto& result = dumpCollectorsStatusLocked(fd); !result.ok()) {
384 return Error(FAILED_TRANSACTION) << result.error();
385 }
386
387 if (!WriteStringToFd(StringPrintf("\n%s%s report:\n%sBoot-time collection information:\n%s\n",
388 kDumpMajorDelimiter.c_str(), kServiceName,
389 kDumpMajorDelimiter.c_str(), std::string(33, '=').c_str()),
390 fd) ||
391 !WriteStringToFd(mBoottimeCollection.toString(), fd) ||
392 !WriteStringToFd(StringPrintf("\nPeriodic collection information:\n%s\n",
393 std::string(32, '=').c_str()),
394 fd) ||
395 !WriteStringToFd(mPeriodicCollection.toString(), fd)) {
396 return Error(FAILED_TRANSACTION)
397 << "Failed to dump the boot-time and periodic collection reports.";
398 }
399
400 for (const auto& processor : mDataProcessors) {
401 if (const auto result = processor->onDump(fd); !result.ok()) {
402 return result;
403 }
404 }
405
406 WriteStringToFd(kDumpMajorDelimiter, fd);
407 return {};
408 }
409
dumpHelpText(int fd)410 bool WatchdogPerfService::dumpHelpText(int fd) {
411 return WriteStringToFd(StringPrintf(kHelpText, kServiceName, kStartCustomCollectionFlag,
412 kIntervalFlag,
413 std::chrono::duration_cast<std::chrono::seconds>(
414 kCustomCollectionInterval)
415 .count(),
416 kMaxDurationFlag,
417 std::chrono::duration_cast<std::chrono::minutes>(
418 kCustomCollectionDuration)
419 .count(),
420 kFilterPackagesFlag, kEndCustomCollectionFlag),
421 fd);
422 }
423
dumpCollectorsStatusLocked(int fd)424 Result<void> WatchdogPerfService::dumpCollectorsStatusLocked(int fd) {
425 if (!mUidIoStats->enabled() &&
426 !WriteStringToFd(StringPrintf("UidIoStats collector failed to access the file %s",
427 mUidIoStats->filePath().c_str()),
428 fd)) {
429 return Error() << "Failed to write UidIoStats collector status";
430 }
431 if (!mProcStat->enabled() &&
432 !WriteStringToFd(StringPrintf("ProcStat collector failed to access the file %s",
433 mProcStat->filePath().c_str()),
434 fd)) {
435 return Error() << "Failed to write ProcStat collector status";
436 }
437 if (!mProcPidStat->enabled() &&
438 !WriteStringToFd(StringPrintf("ProcPidStat collector failed to access the directory %s",
439 mProcPidStat->dirPath().c_str()),
440 fd)) {
441 return Error() << "Failed to write ProcPidStat collector status";
442 }
443 return {};
444 }
445
startCustomCollection(std::chrono::nanoseconds interval,std::chrono::nanoseconds maxDuration,const std::unordered_set<std::string> & filterPackages)446 Result<void> WatchdogPerfService::startCustomCollection(
447 std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
448 const std::unordered_set<std::string>& filterPackages) {
449 if (interval < kMinEventInterval || maxDuration < kMinEventInterval) {
450 return Error(INVALID_OPERATION)
451 << "Collection interval and maximum duration must be >= "
452 << std::chrono::duration_cast<std::chrono::milliseconds>(kMinEventInterval).count()
453 << " milliseconds.";
454 }
455 Mutex::Autolock lock(mMutex);
456 if (EventType expected = EventType::PERIODIC_COLLECTION; mCurrCollectionEvent != expected) {
457 return Error(INVALID_OPERATION)
458 << "Cannot start a custom collection when the current collection event "
459 << toString(mCurrCollectionEvent) << " != " << toString(expected)
460 << " collection event";
461 }
462
463 mCustomCollection = {
464 .eventType = EventType::CUSTOM_COLLECTION,
465 .interval = interval,
466 .lastUptime = mHandlerLooper->now(),
467 .filterPackages = filterPackages,
468 };
469
470 mHandlerLooper->removeMessages(this);
471 nsecs_t uptime = mHandlerLooper->now() + maxDuration.count();
472 mHandlerLooper->sendMessageAtTime(uptime, this, SwitchMessage::END_CUSTOM_COLLECTION);
473 mCurrCollectionEvent = EventType::CUSTOM_COLLECTION;
474 mHandlerLooper->sendMessage(this, EventType::CUSTOM_COLLECTION);
475 ALOGI("Starting %s performance data collection", toString(mCurrCollectionEvent));
476 return {};
477 }
478
endCustomCollection(int fd)479 Result<void> WatchdogPerfService::endCustomCollection(int fd) {
480 Mutex::Autolock lock(mMutex);
481 if (mCurrCollectionEvent != EventType::CUSTOM_COLLECTION) {
482 return Error(INVALID_OPERATION) << "No custom collection is running";
483 }
484
485 mHandlerLooper->removeMessages(this);
486 mHandlerLooper->sendMessage(this, SwitchMessage::END_CUSTOM_COLLECTION);
487
488 if (const auto result = dumpCollectorsStatusLocked(fd); !result.ok()) {
489 return Error(FAILED_TRANSACTION) << result.error();
490 }
491
492 if (!WriteStringToFd(StringPrintf("%sPerformance data report for custom collection:\n%s",
493 kDumpMajorDelimiter.c_str(), kDumpMajorDelimiter.c_str()),
494 fd) ||
495 !WriteStringToFd(mCustomCollection.toString(), fd)) {
496 return Error(FAILED_TRANSACTION) << "Failed to write custom collection report.";
497 }
498
499 for (const auto& processor : mDataProcessors) {
500 if (const auto result = processor->onCustomCollectionDump(fd); !result.ok()) {
501 return Error() << processor->name() << " failed on " << toString(mCurrCollectionEvent)
502 << " collection: " << result.error();
503 }
504 }
505
506 if (DEBUG) {
507 ALOGD("Custom event finished");
508 }
509 WriteStringToFd(kDumpMajorDelimiter, fd);
510 return {};
511 }
512
handleMessage(const Message & message)513 void WatchdogPerfService::handleMessage(const Message& message) {
514 Result<void> result;
515
516 auto switchToPeriodicLocked = [&](bool startNow) {
517 mHandlerLooper->removeMessages(this);
518 mCurrCollectionEvent = EventType::PERIODIC_COLLECTION;
519 mPeriodicCollection.lastUptime = mHandlerLooper->now();
520 if (startNow) {
521 mHandlerLooper->sendMessage(this, EventType::PERIODIC_COLLECTION);
522 } else {
523 mPeriodicCollection.lastUptime += mPeriodicCollection.interval.count();
524 mHandlerLooper->sendMessageAtTime(mPeriodicCollection.lastUptime, this,
525 EventType::PERIODIC_COLLECTION);
526 }
527 mPeriodicMonitor.lastUptime = mHandlerLooper->now() + mPeriodicMonitor.interval.count();
528 mHandlerLooper->sendMessageAtTime(mPeriodicMonitor.lastUptime, this,
529 EventType::PERIODIC_MONITOR);
530 ALOGI("Switching to %s and %s", toString(mCurrCollectionEvent),
531 toString(EventType::PERIODIC_MONITOR));
532 };
533
534 switch (message.what) {
535 case static_cast<int>(EventType::BOOT_TIME_COLLECTION):
536 result = processCollectionEvent(&mBoottimeCollection);
537 break;
538 case static_cast<int>(SwitchMessage::END_BOOTTIME_COLLECTION):
539 if (result = processCollectionEvent(&mBoottimeCollection); result.ok()) {
540 Mutex::Autolock lock(mMutex);
541 switchToPeriodicLocked(/*startNow=*/false);
542 }
543 break;
544 case static_cast<int>(EventType::PERIODIC_COLLECTION):
545 result = processCollectionEvent(&mPeriodicCollection);
546 break;
547 case static_cast<int>(EventType::CUSTOM_COLLECTION):
548 result = processCollectionEvent(&mCustomCollection);
549 break;
550 case static_cast<int>(EventType::PERIODIC_MONITOR):
551 result = processMonitorEvent(&mPeriodicMonitor);
552 break;
553 case static_cast<int>(SwitchMessage::END_CUSTOM_COLLECTION): {
554 Mutex::Autolock lock(mMutex);
555 if (EventType expected = EventType::CUSTOM_COLLECTION;
556 mCurrCollectionEvent != expected) {
557 ALOGW("Skipping END_CUSTOM_COLLECTION message as the current collection %s != %s",
558 toString(mCurrCollectionEvent), toString(expected));
559 return;
560 }
561 mCustomCollection = {};
562 for (const auto& processor : mDataProcessors) {
563 /*
564 * Clear custom collection cache on the data processors when the custom collection
565 * ends.
566 */
567 processor->onCustomCollectionDump(-1);
568 }
569 switchToPeriodicLocked(/*startNow=*/true);
570 return;
571 }
572 default:
573 result = Error() << "Unknown message: " << message.what;
574 }
575
576 if (!result.ok()) {
577 Mutex::Autolock lock(mMutex);
578 ALOGE("Terminating %s: %s", kServiceName, result.error().message().c_str());
579 /*
580 * DO NOT CALL terminate() as it tries to join the collection thread but this code is
581 * executed on the collection thread. Thus it will result in a deadlock.
582 */
583 mCurrCollectionEvent = EventType::TERMINATED;
584 mHandlerLooper->removeMessages(this);
585 mHandlerLooper->wake();
586 }
587 }
588
processCollectionEvent(WatchdogPerfService::EventMetadata * metadata)589 Result<void> WatchdogPerfService::processCollectionEvent(
590 WatchdogPerfService::EventMetadata* metadata) {
591 Mutex::Autolock lock(mMutex);
592 /*
593 * Messages sent to the looper are intrinsically racy such that a message from the previous
594 * collection event may land in the looper after the current collection has already begun. Thus
595 * verify the current collection event before starting the collection.
596 */
597 if (mCurrCollectionEvent != metadata->eventType) {
598 ALOGW("Skipping %s event on collection event %s", toString(metadata->eventType),
599 toString(mCurrCollectionEvent));
600 return {};
601 }
602 if (DEBUG) {
603 ALOGD("Processing %s collection event", toString(metadata->eventType));
604 }
605 if (metadata->interval < kMinEventInterval) {
606 return Error()
607 << "Collection interval of "
608 << std::chrono::duration_cast<std::chrono::seconds>(metadata->interval).count()
609 << " seconds for " << toString(metadata->eventType)
610 << " collection cannot be less than "
611 << std::chrono::duration_cast<std::chrono::seconds>(kMinEventInterval).count()
612 << " seconds";
613 }
614 if (const auto result = collectLocked(metadata); !result.ok()) {
615 return Error() << toString(metadata->eventType) << " collection failed: " << result.error();
616 }
617 metadata->lastUptime += metadata->interval.count();
618 mHandlerLooper->sendMessageAtTime(metadata->lastUptime, this, metadata->eventType);
619 return {};
620 }
621
collectLocked(WatchdogPerfService::EventMetadata * metadata)622 Result<void> WatchdogPerfService::collectLocked(WatchdogPerfService::EventMetadata* metadata) {
623 if (!mUidIoStats->enabled() && !mProcStat->enabled() && !mProcPidStat->enabled()) {
624 return Error() << "No collectors enabled";
625 }
626
627 time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
628
629 if (mUidIoStats->enabled()) {
630 if (const auto result = mUidIoStats->collect(); !result.ok()) {
631 return Error() << "Failed to collect per-uid I/O usage: " << result.error();
632 }
633 }
634
635 if (mProcStat->enabled()) {
636 if (const auto result = mProcStat->collect(); !result.ok()) {
637 return Error() << "Failed to collect proc stats: " << result.error();
638 }
639 }
640
641 if (mProcPidStat->enabled()) {
642 if (const auto result = mProcPidStat->collect(); !result.ok()) {
643 return Error() << "Failed to collect process stats: " << result.error();
644 }
645 }
646
647 for (const auto& processor : mDataProcessors) {
648 Result<void> result;
649 switch (mCurrCollectionEvent) {
650 case EventType::BOOT_TIME_COLLECTION:
651 result = processor->onBoottimeCollection(now, mUidIoStats, mProcStat, mProcPidStat);
652 break;
653 case EventType::PERIODIC_COLLECTION:
654 result = processor->onPeriodicCollection(now, mSystemState, mUidIoStats, mProcStat,
655 mProcPidStat);
656 break;
657 case EventType::CUSTOM_COLLECTION:
658 result = processor->onCustomCollection(now, mSystemState, metadata->filterPackages,
659 mUidIoStats, mProcStat, mProcPidStat);
660 break;
661 default:
662 result = Error() << "Invalid collection event " << toString(mCurrCollectionEvent);
663 }
664 if (!result.ok()) {
665 return Error() << processor->name() << " failed on " << toString(mCurrCollectionEvent)
666 << " collection: " << result.error();
667 }
668 }
669
670 return {};
671 }
672
processMonitorEvent(WatchdogPerfService::EventMetadata * metadata)673 Result<void> WatchdogPerfService::processMonitorEvent(
674 WatchdogPerfService::EventMetadata* metadata) {
675 if (metadata->eventType != static_cast<int>(EventType::PERIODIC_MONITOR)) {
676 return Error() << "Invalid monitor event " << toString(metadata->eventType);
677 }
678 if (DEBUG) {
679 ALOGD("Processing %s monitor event", toString(metadata->eventType));
680 }
681 if (metadata->interval < kMinEventInterval) {
682 return Error()
683 << "Monitor interval of "
684 << std::chrono::duration_cast<std::chrono::seconds>(metadata->interval).count()
685 << " seconds for " << toString(metadata->eventType) << " event cannot be less than "
686 << std::chrono::duration_cast<std::chrono::seconds>(kMinEventInterval).count()
687 << " seconds";
688 }
689 Mutex::Autolock lock(mMutex);
690 if (!mProcDiskStats->enabled()) {
691 return Error() << "Cannot access proc disk stats for monitoring";
692 }
693 time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
694 if (const auto result = mProcDiskStats->collect(); !result.ok()) {
695 return Error() << "Failed to collect disk stats: " << result.error();
696 }
697 auto* currCollectionMetadata = currCollectionMetadataLocked();
698 if (currCollectionMetadata == nullptr) {
699 return Error() << "No metadata available for current collection event: "
700 << toString(mCurrCollectionEvent);
701 }
702 bool requestedCollection = false;
703 const auto requestCollection = [&]() mutable {
704 if (requestedCollection) {
705 return;
706 }
707 const nsecs_t prevUptime =
708 currCollectionMetadata->lastUptime - currCollectionMetadata->interval.count();
709 nsecs_t uptime = mHandlerLooper->now();
710 if (const auto delta = std::abs(uptime - prevUptime); delta < kMinEventInterval.count()) {
711 return;
712 }
713 currCollectionMetadata->lastUptime = uptime;
714 mHandlerLooper->removeMessages(this, currCollectionMetadata->eventType);
715 mHandlerLooper->sendMessage(this, currCollectionMetadata->eventType);
716 requestedCollection = true;
717 };
718 for (const auto& processor : mDataProcessors) {
719 if (const auto result =
720 processor->onPeriodicMonitor(now, mProcDiskStats, requestCollection);
721 !result.ok()) {
722 return Error() << processor->name() << " failed on " << toString(metadata->eventType)
723 << ": " << result.error();
724 }
725 }
726 metadata->lastUptime += metadata->interval.count();
727 if (metadata->lastUptime == currCollectionMetadata->lastUptime) {
728 /*
729 * If the |PERIODIC_MONITOR| and *_COLLECTION events overlap, skip the |PERIODIC_MONITOR|
730 * event.
731 */
732 metadata->lastUptime += metadata->interval.count();
733 }
734 mHandlerLooper->sendMessageAtTime(metadata->lastUptime, this, metadata->eventType);
735 return {};
736 }
737
currCollectionMetadataLocked()738 WatchdogPerfService::EventMetadata* WatchdogPerfService::currCollectionMetadataLocked() {
739 switch (mCurrCollectionEvent) {
740 case EventType::BOOT_TIME_COLLECTION:
741 return &mBoottimeCollection;
742 case EventType::PERIODIC_COLLECTION:
743 return &mPeriodicCollection;
744 case EventType::CUSTOM_COLLECTION:
745 return &mCustomCollection;
746 default:
747 return nullptr;
748 }
749 }
750
751 } // namespace watchdog
752 } // namespace automotive
753 } // namespace android
754