1 /**
2 * Copyright (c) 2020, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false // STOPSHIP if true.
19
20 #include "WatchdogProcessService.h"
21
22 #include "UidProcStatsCollector.h"
23 #include "WatchdogServiceHelper.h"
24
25 #include <aidl/android/hardware/automotive/vehicle/BnVehicle.h>
26 #include <aidl/android/hardware/automotive/vehicle/ProcessTerminationReason.h>
27 #include <android-base/file.h>
28 #include <android-base/macros.h>
29 #include <android-base/properties.h>
30 #include <android-base/stringprintf.h>
31 #include <android-base/strings.h>
32 #include <android/automotive/watchdog/BnCarWatchdogClient.h>
33 #include <android/automotive/watchdog/internal/BnCarWatchdogMonitor.h>
34 #include <android/automotive/watchdog/internal/BnCarWatchdogServiceForSystem.h>
35 #include <android/hidl/manager/1.0/IServiceManager.h>
36 #include <binder/IPCThreadState.h>
37 #include <binder/IServiceManager.h>
38 #include <hidl/HidlTransportSupport.h>
39 #include <utils/SystemClock.h>
40
41 #include <IVhalClient.h>
42 #include <VehicleHalTypes.h>
43
44 #include <utility>
45
46 namespace android {
47 namespace automotive {
48 namespace watchdog {
49
50 namespace aawi = ::android::automotive::watchdog::internal;
51
52 using aawi::BnCarWatchdogServiceForSystem;
53 using aawi::ICarWatchdogServiceForSystem;
54 using aawi::ProcessIdentifier;
55 using ::aidl::android::hardware::automotive::vehicle::BnVehicle;
56 using ::aidl::android::hardware::automotive::vehicle::ProcessTerminationReason;
57 using ::aidl::android::hardware::automotive::vehicle::StatusCode;
58 using ::aidl::android::hardware::automotive::vehicle::SubscribeOptions;
59 using ::aidl::android::hardware::automotive::vehicle::VehiclePropConfig;
60 using ::aidl::android::hardware::automotive::vehicle::VehicleProperty;
61 using ::aidl::android::hardware::automotive::vehicle::VehiclePropertyStatus;
62 using ::aidl::android::hardware::automotive::vehicle::VehiclePropValue;
63 using ::android::IBinder;
64 using ::android::sp;
65 using ::android::String16;
66 using ::android::base::Error;
67 using ::android::base::GetIntProperty;
68 using ::android::base::GetProperty;
69 using ::android::base::ReadFileToString;
70 using ::android::base::Result;
71 using ::android::base::StringAppendF;
72 using ::android::base::StringPrintf;
73 using ::android::base::Trim;
74 using ::android::base::WriteStringToFd;
75 using ::android::binder::Status;
76 using ::android::frameworks::automotive::vhal::HalPropError;
77 using ::android::frameworks::automotive::vhal::IHalPropValue;
78 using ::android::frameworks::automotive::vhal::IVhalClient;
79 using ::android::hardware::hidl_vec;
80 using ::android::hardware::interfacesEqual;
81 using ::android::hardware::Return;
82 using ::android::hidl::base::V1_0::IBase;
83
84 namespace {
85
86 const std::vector<TimeoutLength> kTimeouts = {TimeoutLength::TIMEOUT_CRITICAL,
87 TimeoutLength::TIMEOUT_MODERATE,
88 TimeoutLength::TIMEOUT_NORMAL};
89
90 // TimeoutLength is also used as a message ID. Other message IDs should start next to
91 // TimeoutLength::TIMEOUT_NORMAL.
92 const int32_t MSG_VHAL_WATCHDOG_ALIVE = static_cast<int>(TimeoutLength::TIMEOUT_NORMAL) + 1;
93 const int32_t MSG_VHAL_HEALTH_CHECK = MSG_VHAL_WATCHDOG_ALIVE + 1;
94 const int32_t MSG_CACHE_VHAL_PROCESS_IDENTIFIER = MSG_VHAL_HEALTH_CHECK + 1;
95
96 // VHAL is supposed to send heart beat every 3s. Car watchdog checks if there is the latest heart
97 // beat from VHAL within 3s, allowing 1s marginal time.
98 // If {@code ro.carwatchdog.vhal_healthcheck.interval} is set, car watchdog checks VHAL health at
99 // the given interval. The lower bound of the interval is 3s.
100 constexpr int32_t kDefaultVhalCheckIntervalSec = 3;
101 constexpr std::chrono::milliseconds kHealthCheckDelayMs = 1s;
102
103 constexpr int32_t kMissingIntPropertyValue = -1;
104
105 constexpr const char kPropertyVhalCheckInterval[] = "ro.carwatchdog.vhal_healthcheck.interval";
106 constexpr const char kPropertyClientCheckInterval[] = "ro.carwatchdog.client_healthcheck.interval";
107 constexpr const char kServiceName[] = "WatchdogProcessService";
108 constexpr const char kHidlVhalInterfaceName[] = "android.hardware.automotive.vehicle@2.0::IVehicle";
109 constexpr const char kAidlVhalInterfaceName[] =
110 "android.hardware.automotive.vehicle.IVehicle/default";
111
toPidString(const std::vector<ProcessIdentifier> & processIdentifiers)112 std::string toPidString(const std::vector<ProcessIdentifier>& processIdentifiers) {
113 size_t size = processIdentifiers.size();
114 if (size == 0) {
115 return "";
116 }
117 std::string buffer;
118 StringAppendF(&buffer, "%d", processIdentifiers[0].pid);
119 for (size_t i = 1; i < size; i++) {
120 StringAppendF(&buffer, ", %d", processIdentifiers[i].pid);
121 }
122 return buffer;
123 }
124
isSystemShuttingDown()125 bool isSystemShuttingDown() {
126 std::string sysPowerCtl;
127 std::istringstream tokenStream(GetProperty("sys.powerctl", ""));
128 std::getline(tokenStream, sysPowerCtl, ',');
129 return sysPowerCtl == "reboot" || sysPowerCtl == "shutdown";
130 }
131
getStartTimeForPid(pid_t pid)132 int64_t getStartTimeForPid(pid_t pid) {
133 auto pidStat = UidProcStatsCollector::readStatFileForPid(pid);
134 if (!pidStat.ok()) {
135 return elapsedRealtime();
136 }
137 return pidStat->startTimeMillis;
138 }
139
queryHidlServiceManagerForVhalPid()140 Result<pid_t> queryHidlServiceManagerForVhalPid() {
141 using android::hidl::manager::V1_0::IServiceManager;
142 pid_t pid = -1;
143 Return<void> ret = IServiceManager::getService()->debugDump([&](auto& hals) {
144 for (const auto& info : hals) {
145 if (info.pid == static_cast<int>(IServiceManager::PidConstant::NO_PID)) {
146 continue;
147 }
148 if (info.interfaceName == kHidlVhalInterfaceName) {
149 pid = info.pid;
150 return;
151 }
152 }
153 });
154
155 if (!ret.isOk()) {
156 return Error() << "Failed to get VHAL process id from HIDL service manager";
157 }
158 if (pid == -1) {
159 return Error() << "No VHAL service registered to HIDL service manager";
160 }
161 return pid;
162 }
163
queryAidlServiceManagerForVhalPid()164 Result<pid_t> queryAidlServiceManagerForVhalPid() {
165 using ServiceDebugInfo = android::IServiceManager::ServiceDebugInfo;
166 std::vector<ServiceDebugInfo> serviceDebugInfos =
167 defaultServiceManager()->getServiceDebugInfo();
168 for (const auto& serviceDebugInfo : serviceDebugInfos) {
169 if (serviceDebugInfo.name == kAidlVhalInterfaceName) {
170 return serviceDebugInfo.pid;
171 }
172 }
173 return Error() << "No VHAL service registered to AIDL service manager";
174 }
175
176 } // namespace
177
WatchdogProcessService(const sp<Looper> & handlerLooper)178 WatchdogProcessService::WatchdogProcessService(const sp<Looper>& handlerLooper) :
179 mHandlerLooper(handlerLooper),
180 mLastSessionId(0),
181 mServiceStarted(false),
182 mIsEnabled(true),
183 mVhalService(nullptr) {
184 mOnBinderDiedCallback =
185 std::make_shared<IVhalClient::OnBinderDiedCallbackFunc>([this] { handleVhalDeath(); });
186 for (const auto& timeout : kTimeouts) {
187 mClients.insert(std::make_pair(timeout, std::vector<ClientInfo>()));
188 mPingedClients.insert(std::make_pair(timeout, PingedClientMap()));
189 }
190
191 int32_t vhalHealthCheckIntervalSec =
192 GetIntProperty(kPropertyVhalCheckInterval, kDefaultVhalCheckIntervalSec);
193 vhalHealthCheckIntervalSec = std::max(vhalHealthCheckIntervalSec, kDefaultVhalCheckIntervalSec);
194 mVhalHealthCheckWindowMs = std::chrono::seconds(vhalHealthCheckIntervalSec);
195
196 int32_t clientHealthCheckIntervalSec =
197 GetIntProperty(kPropertyClientCheckInterval, kMissingIntPropertyValue);
198 // Overridden timeout value must be greater than or equal to the maximum possible timeout value.
199 // Otherwise, clients will be pinged more frequently than the guaranteed timeout duration.
200 if (clientHealthCheckIntervalSec != kMissingIntPropertyValue) {
201 int32_t normalSec = std::chrono::duration_cast<std::chrono::seconds>(
202 getTimeoutDurationNs(TimeoutLength::TIMEOUT_NORMAL))
203 .count();
204 mOverriddenClientHealthCheckWindowNs = std::optional<std::chrono::seconds>{
205 std::max(clientHealthCheckIntervalSec, normalSec)};
206 }
207
208 mGetStartTimeForPidFunc = &getStartTimeForPid;
209 }
210
registerWatchdogServiceHelper(const sp<WatchdogServiceHelperInterface> & helper)211 Result<void> WatchdogProcessService::registerWatchdogServiceHelper(
212 const sp<WatchdogServiceHelperInterface>& helper) {
213 if (helper == nullptr) {
214 return Error() << "Must provide a non-null watchdog service helper instance";
215 }
216 Mutex::Autolock lock(mMutex);
217 mWatchdogServiceHelper = helper;
218 return {};
219 }
220
registerClient(const sp<ICarWatchdogClient> & client,TimeoutLength timeout)221 Status WatchdogProcessService::registerClient(const sp<ICarWatchdogClient>& client,
222 TimeoutLength timeout) {
223 if (client == nullptr) {
224 return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
225 "Must provide non-null client");
226 }
227 pid_t callingPid = IPCThreadState::self()->getCallingPid();
228 uid_t callingUid = IPCThreadState::self()->getCallingUid();
229
230 ClientInfo clientInfo(client, callingPid, callingUid, mGetStartTimeForPidFunc(callingPid));
231 return registerClient(clientInfo, timeout);
232 }
233
unregisterClient(const sp<ICarWatchdogClient> & client)234 Status WatchdogProcessService::unregisterClient(const sp<ICarWatchdogClient>& client) {
235 if (client == nullptr) {
236 return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
237 "Must provide non-null client");
238 }
239 Mutex::Autolock lock(mMutex);
240 sp<IBinder> binder = BnCarWatchdogClient::asBinder(client);
241 // kTimeouts is declared as global static constant to cover all kinds of timeout (CRITICAL,
242 // MODERATE, NORMAL).
243 return unregisterClientLocked(kTimeouts, binder, ClientType::Regular);
244 }
245
registerCarWatchdogService(const sp<IBinder> & binder)246 Status WatchdogProcessService::registerCarWatchdogService(const sp<IBinder>& binder) {
247 pid_t callingPid = IPCThreadState::self()->getCallingPid();
248 uid_t callingUid = IPCThreadState::self()->getCallingUid();
249
250 sp<WatchdogServiceHelperInterface> helper;
251 {
252 Mutex::Autolock lock(mMutex);
253 if (mWatchdogServiceHelper == nullptr) {
254 return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE,
255 "Watchdog service helper instance is null");
256 }
257 helper = mWatchdogServiceHelper;
258 }
259
260 ClientInfo clientInfo(helper, binder, callingPid, callingUid,
261 mGetStartTimeForPidFunc(callingPid));
262 return registerClient(clientInfo, TimeoutLength::TIMEOUT_CRITICAL);
263 }
264
unregisterCarWatchdogService(const sp<IBinder> & binder)265 void WatchdogProcessService::unregisterCarWatchdogService(const sp<IBinder>& binder) {
266 Mutex::Autolock lock(mMutex);
267
268 std::vector<TimeoutLength> timeouts = {TimeoutLength::TIMEOUT_CRITICAL};
269 unregisterClientLocked(timeouts, binder, ClientType::Service);
270 }
271
registerMonitor(const sp<aawi::ICarWatchdogMonitor> & monitor)272 Status WatchdogProcessService::registerMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
273 if (monitor == nullptr) {
274 return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
275 "Must provide non-null monitor");
276 }
277 sp<BinderDeathRecipient> binderDeathRecipient;
278 sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
279 {
280 Mutex::Autolock lock(mMutex);
281 if (mBinderDeathRecipient == nullptr) {
282 return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE,
283 "Service is not initialized");
284 }
285 if (mMonitor != nullptr) {
286 if (binder == aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
287 return Status::ok();
288 }
289 aawi::BnCarWatchdogMonitor::asBinder(mMonitor)->unlinkToDeath(mBinderDeathRecipient);
290 }
291 mMonitor = monitor;
292 binderDeathRecipient = mBinderDeathRecipient;
293 }
294 if (status_t ret = binder->linkToDeath(binderDeathRecipient); ret != OK) {
295 {
296 Mutex::Autolock lock(mMutex);
297 if (mMonitor != nullptr && binder == aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
298 mMonitor.clear();
299 }
300 }
301 ALOGW("Failed to register the monitor as it is dead.");
302 return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, "The monitor is dead.");
303 }
304 if (DEBUG) {
305 ALOGD("Car watchdog monitor is registered");
306 }
307 return Status::ok();
308 }
309
unregisterMonitor(const sp<aawi::ICarWatchdogMonitor> & monitor)310 Status WatchdogProcessService::unregisterMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
311 if (monitor == nullptr) {
312 return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
313 "Must provide non-null monitor");
314 }
315 Mutex::Autolock lock(mMutex);
316 if (mBinderDeathRecipient == nullptr) {
317 return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, "Service is not initialized");
318 }
319 sp<IBinder> curBinder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
320 sp<IBinder> newBinder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
321 if (curBinder != newBinder) {
322 ALOGW("Failed to unregister the monitor as it has not been registered.");
323 return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
324 "The monitor has not been registered.");
325 }
326 curBinder->unlinkToDeath(mBinderDeathRecipient);
327 mMonitor.clear();
328 if (DEBUG) {
329 ALOGD("Car watchdog monitor is unregistered");
330 }
331 return Status::ok();
332 }
333
tellClientAlive(const sp<ICarWatchdogClient> & client,int32_t sessionId)334 Status WatchdogProcessService::tellClientAlive(const sp<ICarWatchdogClient>& client,
335 int32_t sessionId) {
336 Mutex::Autolock lock(mMutex);
337 return tellClientAliveLocked(BnCarWatchdogClient::asBinder(client), sessionId);
338 }
339
tellCarWatchdogServiceAlive(const sp<ICarWatchdogServiceForSystem> & service,const std::vector<ProcessIdentifier> & clientsNotResponding,int32_t sessionId)340 Status WatchdogProcessService::tellCarWatchdogServiceAlive(
341 const sp<ICarWatchdogServiceForSystem>& service,
342 const std::vector<ProcessIdentifier>& clientsNotResponding, int32_t sessionId) {
343 Status status;
344 {
345 Mutex::Autolock lock(mMutex);
346 if (DEBUG) {
347 if (clientsNotResponding.size() > 0) {
348 ALOGD("CarWatchdogService(session: %d) responded with non-responding clients: %s",
349 sessionId, toPidString(clientsNotResponding).c_str());
350 }
351 }
352 status = tellClientAliveLocked(BnCarWatchdogServiceForSystem::asBinder(service), sessionId);
353 }
354 if (status.isOk()) {
355 dumpAndKillAllProcesses(clientsNotResponding, /*reportToVhal=*/true);
356 }
357 return status;
358 }
359
tellDumpFinished(const sp<aawi::ICarWatchdogMonitor> & monitor,const ProcessIdentifier & processIdentifier)360 Status WatchdogProcessService::tellDumpFinished(const sp<aawi::ICarWatchdogMonitor>& monitor,
361 const ProcessIdentifier& processIdentifier) {
362 Mutex::Autolock lock(mMutex);
363 if (mMonitor == nullptr || monitor == nullptr ||
364 aawi::BnCarWatchdogMonitor::asBinder(monitor) !=
365 aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
366 return Status::
367 fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
368 "The monitor is not registered or an invalid monitor is given");
369 }
370 ALOGI("Process(pid: %d) has been dumped and killed", processIdentifier.pid);
371 return Status::ok();
372 }
373
setEnabled(bool isEnabled)374 void WatchdogProcessService::setEnabled(bool isEnabled) {
375 Mutex::Autolock lock(mMutex);
376 if (mIsEnabled == isEnabled) {
377 return;
378 }
379 ALOGI("%s is %s", kServiceName, isEnabled ? "enabled" : "disabled");
380 mIsEnabled = isEnabled;
381 mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_HEALTH_CHECK);
382 if (!mIsEnabled) {
383 return;
384 }
385 if (mNotSupportedVhalProperties.count(VehicleProperty::VHAL_HEARTBEAT) == 0) {
386 mVhalHeartBeat.eventTime = uptimeMillis();
387 std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs;
388 mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler,
389 Message(MSG_VHAL_HEALTH_CHECK));
390 }
391 for (const auto& timeout : kTimeouts) {
392 mHandlerLooper->removeMessages(mMessageHandler, static_cast<int>(timeout));
393 startHealthCheckingLocked(timeout);
394 }
395 }
396
onUserStateChange(userid_t userId,bool isStarted)397 void WatchdogProcessService::onUserStateChange(userid_t userId, bool isStarted) {
398 std::string buffer;
399 Mutex::Autolock lock(mMutex);
400 if (isStarted) {
401 mStoppedUserIds.erase(userId);
402 } else {
403 mStoppedUserIds.insert(userId);
404 }
405 }
406
dump(int fd,const Vector<String16> &)407 Result<void> WatchdogProcessService::dump(int fd, const Vector<String16>& /*args*/) {
408 Mutex::Autolock lock(mMutex);
409 const char* indent = " ";
410 const char* doubleIndent = " ";
411 std::string buffer;
412 WriteStringToFd("CAR WATCHDOG PROCESS SERVICE\n", fd);
413 WriteStringToFd(StringPrintf("%s%s enabled: %s\n", indent, kServiceName,
414 mIsEnabled ? "true" : "false"),
415 fd);
416 WriteStringToFd(StringPrintf("%sRegistered clients\n", indent), fd);
417 int count = 1;
418 for (const auto& timeout : kTimeouts) {
419 std::vector<ClientInfo>& clients = mClients[timeout];
420 for (auto it = clients.begin(); it != clients.end(); it++, count++) {
421 WriteStringToFd(StringPrintf("%sClient #%d: %s\n", doubleIndent, count,
422 it->toString().c_str()),
423 fd);
424 }
425 }
426 WriteStringToFd(StringPrintf("%sMonitor registered: %s\n", indent,
427 mMonitor == nullptr ? "false" : "true"),
428 fd);
429 WriteStringToFd(StringPrintf("%sisSystemShuttingDown: %s\n", indent,
430 isSystemShuttingDown() ? "true" : "false"),
431 fd);
432 buffer = "none";
433 bool first = true;
434 for (const auto& userId : mStoppedUserIds) {
435 if (first) {
436 buffer = StringPrintf("%d", userId);
437 first = false;
438 } else {
439 StringAppendF(&buffer, ", %d", userId);
440 }
441 }
442 WriteStringToFd(StringPrintf("%sStopped users: %s\n", indent, buffer.c_str()), fd);
443 WriteStringToFd(StringPrintf("%sVHAL health check interval: %lldms\n", indent,
444 mVhalHealthCheckWindowMs.count()),
445 fd);
446 if (mVhalProcessIdentifier.has_value()) {
447 WriteStringToFd(StringPrintf("%sVHAL process identifier (PID = %d, Start time millis = "
448 "%" PRIi64 ")",
449 indent, mVhalProcessIdentifier->pid,
450 mVhalProcessIdentifier->startTimeMillis),
451 fd);
452 }
453 return {};
454 }
455
doHealthCheck(int what)456 void WatchdogProcessService::doHealthCheck(int what) {
457 mHandlerLooper->removeMessages(mMessageHandler, what);
458 if (Mutex::Autolock lock(mMutex); !mIsEnabled) {
459 return;
460 }
461 const TimeoutLength timeout = static_cast<TimeoutLength>(what);
462 dumpAndKillClientsIfNotResponding(timeout);
463
464 /* Generates a temporary/local vector containing clients.
465 * Using a local copy may send unnecessary ping messages to clients after they are unregistered.
466 * Clients should be able to handle them.
467 */
468 std::vector<ClientInfo> clientsToCheck;
469 PingedClientMap& pingedClients = mPingedClients[timeout];
470 {
471 Mutex::Autolock lock(mMutex);
472 pingedClients.clear();
473 clientsToCheck = mClients[timeout];
474 for (auto& clientInfo : clientsToCheck) {
475 if (mStoppedUserIds.count(clientInfo.userId) > 0) {
476 continue;
477 }
478 int sessionId = getNewSessionId();
479 clientInfo.sessionId = sessionId;
480 pingedClients.insert(std::make_pair(sessionId, clientInfo));
481 }
482 }
483
484 for (const auto& clientInfo : clientsToCheck) {
485 Status status = clientInfo.checkIfAlive(timeout);
486 if (!status.isOk()) {
487 ALOGW("Sending a ping message to client(pid: %d) failed: %s", clientInfo.pid,
488 status.exceptionMessage().c_str());
489 {
490 Mutex::Autolock lock(mMutex);
491 pingedClients.erase(clientInfo.sessionId);
492 }
493 }
494 }
495 // Though the size of pingedClients is a more specific measure, clientsToCheck is used as a
496 // conservative approach.
497 if (clientsToCheck.size() > 0) {
498 auto durationNs = getTimeoutDurationNs(timeout);
499 mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
500 }
501 }
502
start()503 Result<void> WatchdogProcessService::start() {
504 {
505 Mutex::Autolock lock(mMutex);
506 if (mServiceStarted) {
507 return Error(INVALID_OPERATION) << "Cannot start process monitoring more than once";
508 }
509 auto thiz = sp<WatchdogProcessService>::fromExisting(this);
510 mMessageHandler = sp<MessageHandlerImpl>::make(thiz);
511 mBinderDeathRecipient = sp<BinderDeathRecipient>::make(thiz);
512 mPropertyChangeListener = std::make_shared<PropertyChangeListener>(thiz);
513 mServiceStarted = true;
514 }
515 reportWatchdogAliveToVhal();
516 return {};
517 }
518
terminate()519 void WatchdogProcessService::terminate() {
520 Mutex::Autolock lock(mMutex);
521 if (!mServiceStarted) {
522 return;
523 }
524 for (const auto& timeout : kTimeouts) {
525 std::vector<ClientInfo>& clients = mClients[timeout];
526 for (auto it = clients.begin(); it != clients.end();) {
527 it->unlinkToDeath(mBinderDeathRecipient);
528 it = clients.erase(it);
529 }
530 }
531 mWatchdogServiceHelper.clear();
532 if (mMonitor != nullptr) {
533 sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
534 binder->unlinkToDeath(mBinderDeathRecipient);
535 }
536 mBinderDeathRecipient.clear();
537 mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_HEALTH_CHECK);
538 mServiceStarted = false;
539 if (mVhalService == nullptr) {
540 return;
541 }
542 if (mNotSupportedVhalProperties.count(VehicleProperty::VHAL_HEARTBEAT) == 0) {
543 std::vector<int32_t> propIds = {static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT)};
544 auto result =
545 mVhalService->getSubscriptionClient(mPropertyChangeListener)->unsubscribe(propIds);
546 if (!result.ok()) {
547 ALOGW("Failed to unsubscribe from VHAL_HEARTBEAT.");
548 }
549 }
550 mVhalService->removeOnBinderDiedCallback(mOnBinderDiedCallback);
551 mVhalService.reset();
552 }
553
registerClient(const ClientInfo & clientInfo,TimeoutLength timeout)554 Status WatchdogProcessService::registerClient(const ClientInfo& clientInfo, TimeoutLength timeout) {
555 sp<BinderDeathRecipient> binderDeathRecipient;
556 {
557 Mutex::Autolock lock(mMutex);
558 if (mBinderDeathRecipient == nullptr) {
559 return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE,
560 "Service is not initialized");
561 }
562 if (findClientAndProcessLocked(kTimeouts, clientInfo, nullptr)) {
563 ALOGW("Failed to register (%s) as it is already registered.",
564 clientInfo.toString().c_str());
565 return Status::ok();
566 }
567 std::vector<ClientInfo>& clients = mClients[timeout];
568 clients.emplace_back(clientInfo);
569 binderDeathRecipient = mBinderDeathRecipient;
570 }
571 if (status_t status = clientInfo.linkToDeath(binderDeathRecipient); status != OK) {
572 Mutex::Autolock lock(mMutex);
573 std::vector<TimeoutLength> timeouts = {timeout};
574 findClientAndProcessLocked(timeouts, clientInfo,
575 [&](std::vector<ClientInfo>& clients,
576 std::vector<ClientInfo>::const_iterator it) {
577 clients.erase(it);
578 });
579 ALOGW("Failed to register (%s) as it is dead", clientInfo.toString().c_str());
580 std::string errorStr = StringPrintf("(%s) is dead", clientInfo.toString().c_str());
581 return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, errorStr.c_str());
582 }
583 if (DEBUG) {
584 ALOGD("Car watchdog client (%s, timeout = %d) is registered", clientInfo.toString().c_str(),
585 timeout);
586 }
587 Mutex::Autolock lock(mMutex);
588 // If the client array becomes non-empty, start health checking.
589 if (mClients[timeout].size() == 1) {
590 startHealthCheckingLocked(timeout);
591 ALOGI("Starting health checking for timeout = %d", timeout);
592 }
593 return Status::ok();
594 }
595
unregisterClientLocked(const std::vector<TimeoutLength> & timeouts,sp<IBinder> binder,ClientType clientType)596 Status WatchdogProcessService::unregisterClientLocked(const std::vector<TimeoutLength>& timeouts,
597 sp<IBinder> binder, ClientType clientType) {
598 const char* clientName = clientType == ClientType::Regular ? "client" : "watchdog service";
599 bool result = findClientAndProcessLocked(timeouts, binder,
600 [&](std::vector<ClientInfo>& clients,
601 std::vector<ClientInfo>::const_iterator it) {
602 it->unlinkToDeath(mBinderDeathRecipient);
603 clients.erase(it);
604 });
605 if (!result) {
606 std::string errorStr = StringPrintf("The %s has not been registered", clientName);
607 const char* errorCause = errorStr.c_str();
608 ALOGW("Failed to unregister the %s: %s", clientName, errorCause);
609 return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, errorCause);
610 }
611 if (DEBUG) {
612 ALOGD("Car watchdog %s is unregistered", clientName);
613 }
614 return Status::ok();
615 }
616
tellClientAliveLocked(const sp<IBinder> & binder,int32_t sessionId)617 Status WatchdogProcessService::tellClientAliveLocked(const sp<IBinder>& binder, int32_t sessionId) {
618 for (const auto& timeout : kTimeouts) {
619 PingedClientMap& clients = mPingedClients[timeout];
620 PingedClientMap::const_iterator it = clients.find(sessionId);
621 if (it == clients.cend() || !it->second.matchesBinder(binder)) {
622 continue;
623 }
624 clients.erase(it);
625 return Status::ok();
626 }
627 return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
628 "The client is not registered or the session ID is not found");
629 }
630
findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,const ClientInfo & clientInfo,const Processor & processor)631 bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
632 const ClientInfo& clientInfo,
633 const Processor& processor) {
634 for (const auto& timeout : timeouts) {
635 std::vector<ClientInfo>& clients = mClients[timeout];
636 for (auto it = clients.begin(); it != clients.end(); it++) {
637 if (std::as_const(*it) != clientInfo) {
638 continue;
639 }
640 if (processor != nullptr) {
641 processor(clients, it);
642 }
643 return true;
644 }
645 }
646 return false;
647 }
648
findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,const sp<IBinder> binder,const Processor & processor)649 bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
650 const sp<IBinder> binder,
651 const Processor& processor) {
652 for (const auto& timeout : timeouts) {
653 std::vector<ClientInfo>& clients = mClients[timeout];
654 for (auto it = clients.begin(); it != clients.end(); it++) {
655 if (!it->matchesBinder(binder)) {
656 continue;
657 }
658 if (processor != nullptr) {
659 processor(clients, it);
660 }
661 return true;
662 }
663 }
664 return false;
665 }
666
startHealthCheckingLocked(TimeoutLength timeout)667 Result<void> WatchdogProcessService::startHealthCheckingLocked(TimeoutLength timeout) {
668 PingedClientMap& clients = mPingedClients[timeout];
669 clients.clear();
670 int what = static_cast<int>(timeout);
671 auto durationNs = getTimeoutDurationNs(timeout);
672 mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
673 return {};
674 }
675
dumpAndKillClientsIfNotResponding(TimeoutLength timeout)676 Result<void> WatchdogProcessService::dumpAndKillClientsIfNotResponding(TimeoutLength timeout) {
677 std::vector<ProcessIdentifier> processIdentifiers;
678 std::vector<const ClientInfo*> clientsToNotify;
679 {
680 Mutex::Autolock lock(mMutex);
681 PingedClientMap& clients = mPingedClients[timeout];
682 for (PingedClientMap::const_iterator it = clients.cbegin(); it != clients.cend(); it++) {
683 pid_t pid = -1;
684 userid_t userId = -1;
685 uint64_t startTimeMillis = 0;
686 std::vector<TimeoutLength> timeouts = {timeout};
687 findClientAndProcessLocked(timeouts, it->second,
688 [&](std::vector<ClientInfo>& cachedClients,
689 std::vector<ClientInfo>::const_iterator
690 cachedClientsIt) {
691 pid = cachedClientsIt->pid;
692 startTimeMillis = cachedClientsIt->startTimeMillis;
693 userId = cachedClientsIt->userId;
694 cachedClients.erase(cachedClientsIt);
695 });
696 if (pid != -1 && mStoppedUserIds.count(userId) == 0) {
697 clientsToNotify.emplace_back(&it->second);
698 ProcessIdentifier processIdentifier;
699 processIdentifier.pid = pid;
700 processIdentifier.startTimeMillis = startTimeMillis;
701 processIdentifiers.push_back(processIdentifier);
702 }
703 }
704 }
705 for (const ClientInfo*& clientInfo : clientsToNotify) {
706 clientInfo->prepareProcessTermination();
707 }
708 return dumpAndKillAllProcesses(processIdentifiers, /*reportToVhal=*/true);
709 }
710
dumpAndKillAllProcesses(const std::vector<ProcessIdentifier> & processesNotResponding,bool reportToVhal)711 Result<void> WatchdogProcessService::dumpAndKillAllProcesses(
712 const std::vector<ProcessIdentifier>& processesNotResponding, bool reportToVhal) {
713 size_t size = processesNotResponding.size();
714 if (size == 0) {
715 return {};
716 }
717 std::string pidString = toPidString(processesNotResponding);
718 sp<aawi::ICarWatchdogMonitor> monitor;
719 {
720 Mutex::Autolock lock(mMutex);
721 if (mMonitor == nullptr) {
722 std::string errorMsg =
723 StringPrintf("Failed to dump and kill processes(pid = %s): Monitor is not set",
724 pidString.c_str());
725 ALOGW("%s", errorMsg.c_str());
726 return Error() << errorMsg;
727 }
728 monitor = mMonitor;
729 }
730 if (isSystemShuttingDown()) {
731 ALOGI("Skip dumping and killing processes(%s): The system is shutting down",
732 pidString.c_str());
733 return {};
734 }
735 if (reportToVhal) {
736 reportTerminatedProcessToVhal(processesNotResponding);
737 }
738 monitor->onClientsNotResponding(processesNotResponding);
739 if (DEBUG) {
740 ALOGD("Dumping and killing processes is requested: %s", pidString.c_str());
741 }
742 return {};
743 }
744
745 // Handle when car watchdog clients die.
handleBinderDeath(const wp<IBinder> & who)746 void WatchdogProcessService::handleBinderDeath(const wp<IBinder>& who) {
747 Mutex::Autolock lock(mMutex);
748 IBinder* binder = who.unsafe_get();
749 // Check if dead binder is monitor.
750 sp<IBinder> monitor = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
751 if (monitor == binder) {
752 mMonitor.clear();
753 ALOGW("The monitor has died.");
754 return;
755 }
756 findClientAndProcessLocked(kTimeouts, who.promote(),
757 [&](std::vector<ClientInfo>& clients,
758 std::vector<ClientInfo>::const_iterator it) {
759 ALOGW("Client(pid: %d) died", it->pid);
760 clients.erase(it);
761 });
762 }
763
764 // Handle when VHAL dies.
handleVhalDeath()765 void WatchdogProcessService::handleVhalDeath() {
766 Mutex::Autolock lock(mMutex);
767 ALOGW("VHAL has died.");
768 mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_HEALTH_CHECK);
769 // Destroying mVHalService would remove all onBinderDied callbacks.
770 mVhalService.reset();
771 }
772
reportWatchdogAliveToVhal()773 void WatchdogProcessService::reportWatchdogAliveToVhal() {
774 if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_ALIVE) > 0) {
775 ALOGW("VHAL doesn't support WATCHDOG_ALIVE. Car watchdog will not update WATCHDOG_ALIVE.");
776 return;
777 }
778 int64_t systemUptime = uptimeMillis();
779 VehiclePropValue propValue{
780 .prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_ALIVE),
781 .value.int64Values = {systemUptime},
782 };
783 const auto& ret = updateVhal(propValue);
784 if (!ret.ok()) {
785 ALOGW("Failed to update WATCHDOG_ALIVE VHAL property. Will try again in 3s, error: %s",
786 ret.error().message().c_str());
787 }
788 // Update VHAL with the interval of TIMEOUT_CRITICAL(3s).
789 auto durationNs = getTimeoutDurationNs(TimeoutLength::TIMEOUT_CRITICAL);
790 mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_WATCHDOG_ALIVE);
791 mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler,
792 Message(MSG_VHAL_WATCHDOG_ALIVE));
793 }
794
reportTerminatedProcessToVhal(const std::vector<ProcessIdentifier> & processesNotResponding)795 void WatchdogProcessService::reportTerminatedProcessToVhal(
796 const std::vector<ProcessIdentifier>& processesNotResponding) {
797 if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_TERMINATED_PROCESS) > 0) {
798 ALOGW("VHAL doesn't support WATCHDOG_TERMINATED_PROCESS. Terminated process is not "
799 "reported to VHAL.");
800 return;
801 }
802 for (auto&& processIdentifier : processesNotResponding) {
803 const auto& retCmdLine = readProcCmdLine(processIdentifier.pid);
804 if (!retCmdLine.ok()) {
805 ALOGW("Failed to get process command line for pid(%d): %s", processIdentifier.pid,
806 retCmdLine.error().message().c_str());
807 continue;
808 }
809 std::string procCmdLine = retCmdLine.value();
810 VehiclePropValue propValue{
811 .prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_TERMINATED_PROCESS),
812 .value.int32Values = {static_cast<int32_t>(
813 ProcessTerminationReason::NOT_RESPONDING)},
814 .value.stringValue = procCmdLine,
815 };
816 const auto& retUpdate = updateVhal(propValue);
817 if (!retUpdate.ok()) {
818 ALOGW("Failed to update WATCHDOG_TERMINATED_PROCESS VHAL property(command line: %s)",
819 procCmdLine.c_str());
820 }
821 }
822 }
823
updateVhal(const VehiclePropValue & value)824 Result<void> WatchdogProcessService::updateVhal(const VehiclePropValue& value) {
825 Mutex::Autolock lock(mMutex);
826 const auto& connectRet = connectToVhalLocked();
827 if (!connectRet.ok()) {
828 std::string errorMsg = "VHAL is not connected: " + connectRet.error().message();
829 ALOGW("%s", errorMsg.c_str());
830 return Error() << errorMsg;
831 }
832 int32_t propId = value.prop;
833 if (mNotSupportedVhalProperties.count(static_cast<VehicleProperty>(propId)) > 0) {
834 std::string errorMsg = StringPrintf("VHAL doesn't support property(id: %d)", propId);
835 ALOGW("%s", errorMsg.c_str());
836 return Error() << errorMsg;
837 }
838
839 auto halPropValue = mVhalService->createHalPropValue(propId);
840 halPropValue->setInt32Values(value.value.int32Values);
841 halPropValue->setInt64Values(value.value.int64Values);
842 halPropValue->setStringValue(value.value.stringValue);
843 if (auto result = mVhalService->setValueSync(*halPropValue); !result.ok()) {
844 return Error() << "Failed to set propValue(" << propId
845 << ") to VHAL, error: " << result.error().message();
846 }
847
848 return {};
849 }
850
readProcCmdLine(int32_t pid)851 Result<std::string> WatchdogProcessService::readProcCmdLine(int32_t pid) {
852 std::string cmdLinePath = StringPrintf("/proc/%d/cmdline", pid);
853 std::string procCmdLine;
854 if (ReadFileToString(cmdLinePath, &procCmdLine)) {
855 std::replace(procCmdLine.begin(), procCmdLine.end(), '\0', ' ');
856 procCmdLine = Trim(procCmdLine);
857 return procCmdLine;
858 }
859 return Error() << "Failed to read " << cmdLinePath;
860 }
861
connectToVhalLocked()862 Result<void> WatchdogProcessService::connectToVhalLocked() {
863 if (mVhalService != nullptr) {
864 return {};
865 }
866 mVhalService = IVhalClient::tryCreate();
867 if (mVhalService == nullptr) {
868 return Error() << "Failed to connect to VHAL.";
869 }
870 mVhalService->addOnBinderDiedCallback(mOnBinderDiedCallback);
871 queryVhalPropertiesLocked();
872 subscribeToVhalHeartBeatLocked();
873 ALOGI("Successfully connected to VHAL.");
874 return {};
875 }
876
queryVhalPropertiesLocked()877 void WatchdogProcessService::queryVhalPropertiesLocked() {
878 mNotSupportedVhalProperties.clear();
879 std::vector<VehicleProperty> propIds = {VehicleProperty::WATCHDOG_ALIVE,
880 VehicleProperty::WATCHDOG_TERMINATED_PROCESS,
881 VehicleProperty::VHAL_HEARTBEAT};
882 for (const auto& propId : propIds) {
883 if (!isVhalPropertySupportedLocked(propId)) {
884 mNotSupportedVhalProperties.insert(propId);
885 }
886 }
887 }
888
isVhalPropertySupportedLocked(VehicleProperty propId)889 bool WatchdogProcessService::isVhalPropertySupportedLocked(VehicleProperty propId) {
890 auto result = mVhalService->getPropConfigs({static_cast<int32_t>(propId)});
891 return result.ok();
892 }
893
subscribeToVhalHeartBeatLocked()894 void WatchdogProcessService::subscribeToVhalHeartBeatLocked() {
895 if (mNotSupportedVhalProperties.count(VehicleProperty::VHAL_HEARTBEAT) > 0) {
896 ALOGW("VHAL doesn't support VHAL_HEARTBEAT. Checking VHAL health is disabled.");
897 return;
898 }
899
900 mVhalHeartBeat = {
901 .eventTime = 0,
902 .value = 0,
903 };
904
905 std::vector<SubscribeOptions> options = {
906 {.propId = static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT), .areaIds = {}},
907 };
908 if (auto result =
909 mVhalService->getSubscriptionClient(mPropertyChangeListener)->subscribe(options);
910 !result.ok()) {
911 ALOGW("Failed to subscribe to VHAL_HEARTBEAT. Checking VHAL health is disabled. '%s'",
912 result.error().message().c_str());
913 return;
914 }
915 std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs;
916 mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler,
917 Message(MSG_VHAL_HEALTH_CHECK));
918 // VHAL process identifier is required only when termiating the VHAL process. VHAL process is
919 // terminated only when the VHAL is unhealthy. However, caching the process identifier as soon
920 // as connecting to VHAL guarantees the correct PID is cached. Because the VHAL pid is queried
921 // from the service manager, the caching should be performed outside the class level lock. So,
922 // handle the caching in the handler thread after successfully subscribing to the VHAL_HEARTBEAT
923 // property.
924 mHandlerLooper->sendMessage(mMessageHandler, Message(MSG_CACHE_VHAL_PROCESS_IDENTIFIER));
925 return;
926 }
927
cacheVhalProcessIdentifier()928 bool WatchdogProcessService::cacheVhalProcessIdentifier() {
929 pid_t pid = -1;
930 if (Result<pid_t> hidlResult = queryHidlServiceManagerForVhalPid(); hidlResult.ok()) {
931 pid = *hidlResult;
932 ALOGI("Fetched HIDL VHAL PID %d", pid);
933 } else if (Result<pid_t> aidlResult = queryAidlServiceManagerForVhalPid(); aidlResult.ok()) {
934 pid = *aidlResult;
935 ALOGI("Fetched AIDL VHAL PID %d", pid);
936 } else {
937 ALOGE("Failed to fetch VHAL pid:\n\t%s\n\t%s", hidlResult.error().message().c_str(),
938 aidlResult.error().message().c_str());
939 return false;
940 }
941 ProcessIdentifier processIdentifier;
942 processIdentifier.pid = pid;
943 processIdentifier.startTimeMillis = mGetStartTimeForPidFunc(pid);
944
945 Mutex::Autolock lock(mMutex);
946 mVhalProcessIdentifier = processIdentifier;
947 return true;
948 }
949
getNewSessionId()950 int32_t WatchdogProcessService::getNewSessionId() {
951 // Make sure that session id is always positive number.
952 if (++mLastSessionId <= 0) {
953 mLastSessionId = 1;
954 }
955 return mLastSessionId;
956 }
957
updateVhalHeartBeat(int64_t value)958 void WatchdogProcessService::updateVhalHeartBeat(int64_t value) {
959 bool wrongHeartBeat;
960 {
961 Mutex::Autolock lock(mMutex);
962 if (!mIsEnabled) {
963 return;
964 }
965 wrongHeartBeat = value <= mVhalHeartBeat.value;
966 mVhalHeartBeat.eventTime = uptimeMillis();
967 mVhalHeartBeat.value = value;
968 }
969 if (wrongHeartBeat) {
970 ALOGW("VHAL updated heart beat with a wrong value. Terminating VHAL...");
971 terminateVhal();
972 return;
973 }
974 std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs;
975 mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler,
976 Message(MSG_VHAL_HEALTH_CHECK));
977 }
978
checkVhalHealth()979 void WatchdogProcessService::checkVhalHealth() {
980 int64_t lastEventTime;
981 int64_t currentUptime = uptimeMillis();
982 {
983 Mutex::Autolock lock(mMutex);
984 if (mVhalService == nullptr || !mIsEnabled) {
985 return;
986 }
987 lastEventTime = mVhalHeartBeat.eventTime;
988 }
989 if (currentUptime > lastEventTime + mVhalHealthCheckWindowMs.count()) {
990 ALOGW("VHAL failed to update heart beat within timeout. Terminating VHAL...");
991 terminateVhal();
992 }
993 }
994
terminateVhal()995 void WatchdogProcessService::terminateVhal() {
996 auto maybeDumpAndKillVhalProcess = [&]() -> bool {
997 std::optional<ProcessIdentifier> processIdentifier;
998 {
999 Mutex::Autolock lock(mMutex);
1000 processIdentifier = mVhalProcessIdentifier;
1001 }
1002 if (!processIdentifier.has_value()) {
1003 return false;
1004 }
1005 dumpAndKillAllProcesses(std::vector<ProcessIdentifier>(1, *processIdentifier),
1006 /*reportToVhal=*/false);
1007 return true;
1008 };
1009 if (maybeDumpAndKillVhalProcess()) {
1010 return;
1011 }
1012 if (!cacheVhalProcessIdentifier() || !maybeDumpAndKillVhalProcess()) {
1013 ALOGE("Failed to termitate VHAL: failed to fetch VHAL PID");
1014 }
1015 }
1016
getTimeoutDurationNs(const TimeoutLength & timeout)1017 std::chrono::nanoseconds WatchdogProcessService::getTimeoutDurationNs(
1018 const TimeoutLength& timeout) {
1019 // When a default timeout has been overridden by the |kPropertyClientCheckInterval| read-only
1020 // property override the timeout value for all timeout lengths.
1021 if (mOverriddenClientHealthCheckWindowNs.has_value()) {
1022 return mOverriddenClientHealthCheckWindowNs.value();
1023 }
1024 switch (timeout) {
1025 case TimeoutLength::TIMEOUT_CRITICAL:
1026 return 3s; // 3s and no buffer time.
1027 case TimeoutLength::TIMEOUT_MODERATE:
1028 return 6s; // 5s + 1s as buffer time.
1029 case TimeoutLength::TIMEOUT_NORMAL:
1030 return 12s; // 10s + 2s as buffer time.
1031 }
1032 }
1033
toString() const1034 std::string WatchdogProcessService::ClientInfo::toString() const {
1035 std::string buffer;
1036 StringAppendF(&buffer, "pid = %d, userId = %d, type = %s", pid, userId,
1037 type == ClientType::Regular ? "regular" : "watchdog service");
1038 return buffer;
1039 }
1040
getBinder() const1041 sp<IBinder> WatchdogProcessService::ClientInfo::getBinder() const {
1042 if (type == ClientType::Regular) {
1043 return BnCarWatchdogClient::asBinder(client);
1044 }
1045 return watchdogServiceBinder;
1046 }
1047
linkToDeath(const sp<IBinder::DeathRecipient> & recipient) const1048 status_t WatchdogProcessService::ClientInfo::linkToDeath(
1049 const sp<IBinder::DeathRecipient>& recipient) const {
1050 if (type == ClientType::Regular) {
1051 return BnCarWatchdogClient::asBinder(client)->linkToDeath(recipient);
1052 }
1053 // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
1054 // skip this step.
1055 return OK;
1056 }
1057
unlinkToDeath(const wp<IBinder::DeathRecipient> & recipient) const1058 status_t WatchdogProcessService::ClientInfo::unlinkToDeath(
1059 const wp<IBinder::DeathRecipient>& recipient) const {
1060 if (type == ClientType::Regular) {
1061 return BnCarWatchdogClient::asBinder(client)->unlinkToDeath(recipient);
1062 }
1063 // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
1064 // skip this step.
1065 return OK;
1066 }
1067
checkIfAlive(TimeoutLength timeout) const1068 Status WatchdogProcessService::ClientInfo::checkIfAlive(TimeoutLength timeout) const {
1069 if (type == ClientType::Regular) {
1070 return client->checkIfAlive(sessionId, timeout);
1071 }
1072 return watchdogServiceHelper->checkIfAlive(watchdogServiceBinder, sessionId, timeout);
1073 }
1074
prepareProcessTermination() const1075 Status WatchdogProcessService::ClientInfo::prepareProcessTermination() const {
1076 if (type == ClientType::Regular) {
1077 return client->prepareProcessTermination();
1078 }
1079 return watchdogServiceHelper->prepareProcessTermination(watchdogServiceBinder);
1080 }
1081
BinderDeathRecipient(const sp<WatchdogProcessService> & service)1082 WatchdogProcessService::BinderDeathRecipient::BinderDeathRecipient(
1083 const sp<WatchdogProcessService>& service) :
1084 mService(service) {}
1085
binderDied(const wp<IBinder> & who)1086 void WatchdogProcessService::BinderDeathRecipient::binderDied(const wp<IBinder>& who) {
1087 mService->handleBinderDeath(who);
1088 }
1089
PropertyChangeListener(const sp<WatchdogProcessService> & service)1090 WatchdogProcessService::PropertyChangeListener::PropertyChangeListener(
1091 const sp<WatchdogProcessService>& service) :
1092 mService(service) {}
1093
onPropertyEvent(const std::vector<std::unique_ptr<IHalPropValue>> & propValues)1094 void WatchdogProcessService::PropertyChangeListener::onPropertyEvent(
1095 const std::vector<std::unique_ptr<IHalPropValue>>& propValues) {
1096 for (const auto& value : propValues) {
1097 if (value->getPropId() == static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT)) {
1098 if (value->getInt64Values().size() < 1) {
1099 ALOGE("Invalid VHAL_HEARTBEAT value, empty value");
1100 } else {
1101 mService->updateVhalHeartBeat(value->getInt64Values()[0]);
1102 }
1103 break;
1104 }
1105 }
1106 }
1107
onPropertySetError(const std::vector<HalPropError> & errors)1108 void WatchdogProcessService::PropertyChangeListener::onPropertySetError(
1109 const std::vector<HalPropError>& errors) {
1110 for (const auto& error : errors) {
1111 if (error.propId != static_cast<int32_t>(VehicleProperty::WATCHDOG_ALIVE) &&
1112 error.propId != static_cast<int32_t>(VehicleProperty::WATCHDOG_TERMINATED_PROCESS)) {
1113 continue;
1114 }
1115 ALOGE("failed to set VHAL property, prop ID: %d, status: %d", error.propId,
1116 static_cast<int32_t>(error.status));
1117 }
1118 }
1119
MessageHandlerImpl(const sp<WatchdogProcessService> & service)1120 WatchdogProcessService::MessageHandlerImpl::MessageHandlerImpl(
1121 const sp<WatchdogProcessService>& service) :
1122 mService(service) {}
1123
handleMessage(const Message & message)1124 void WatchdogProcessService::MessageHandlerImpl::handleMessage(const Message& message) {
1125 switch (message.what) {
1126 case static_cast<int>(TimeoutLength::TIMEOUT_CRITICAL):
1127 case static_cast<int>(TimeoutLength::TIMEOUT_MODERATE):
1128 case static_cast<int>(TimeoutLength::TIMEOUT_NORMAL):
1129 mService->doHealthCheck(message.what);
1130 break;
1131 case MSG_VHAL_WATCHDOG_ALIVE:
1132 mService->reportWatchdogAliveToVhal();
1133 break;
1134 case MSG_VHAL_HEALTH_CHECK:
1135 mService->checkVhalHealth();
1136 break;
1137 case MSG_CACHE_VHAL_PROCESS_IDENTIFIER:
1138 mService->cacheVhalProcessIdentifier();
1139 break;
1140 default:
1141 ALOGW("Unknown message: %d", message.what);
1142 }
1143 }
1144
1145 } // namespace watchdog
1146 } // namespace automotive
1147 } // namespace android
1148