• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false  // STOPSHIP if true.
19 
20 #include "WatchdogProcessService.h"
21 
22 #include "UidProcStatsCollector.h"
23 #include "WatchdogServiceHelper.h"
24 
25 #include <aidl/android/hardware/automotive/vehicle/BnVehicle.h>
26 #include <aidl/android/hardware/automotive/vehicle/ProcessTerminationReason.h>
27 #include <android-base/file.h>
28 #include <android-base/macros.h>
29 #include <android-base/properties.h>
30 #include <android-base/stringprintf.h>
31 #include <android-base/strings.h>
32 #include <android/automotive/watchdog/BnCarWatchdogClient.h>
33 #include <android/automotive/watchdog/internal/BnCarWatchdogMonitor.h>
34 #include <android/automotive/watchdog/internal/BnCarWatchdogServiceForSystem.h>
35 #include <android/hidl/manager/1.0/IServiceManager.h>
36 #include <binder/IPCThreadState.h>
37 #include <binder/IServiceManager.h>
38 #include <hidl/HidlTransportSupport.h>
39 #include <utils/SystemClock.h>
40 
41 #include <IVhalClient.h>
42 #include <VehicleHalTypes.h>
43 
44 #include <utility>
45 
46 namespace android {
47 namespace automotive {
48 namespace watchdog {
49 
50 namespace aawi = ::android::automotive::watchdog::internal;
51 
52 using aawi::BnCarWatchdogServiceForSystem;
53 using aawi::ICarWatchdogServiceForSystem;
54 using aawi::ProcessIdentifier;
55 using ::aidl::android::hardware::automotive::vehicle::BnVehicle;
56 using ::aidl::android::hardware::automotive::vehicle::ProcessTerminationReason;
57 using ::aidl::android::hardware::automotive::vehicle::StatusCode;
58 using ::aidl::android::hardware::automotive::vehicle::SubscribeOptions;
59 using ::aidl::android::hardware::automotive::vehicle::VehiclePropConfig;
60 using ::aidl::android::hardware::automotive::vehicle::VehicleProperty;
61 using ::aidl::android::hardware::automotive::vehicle::VehiclePropertyStatus;
62 using ::aidl::android::hardware::automotive::vehicle::VehiclePropValue;
63 using ::android::IBinder;
64 using ::android::sp;
65 using ::android::String16;
66 using ::android::base::Error;
67 using ::android::base::GetIntProperty;
68 using ::android::base::GetProperty;
69 using ::android::base::ReadFileToString;
70 using ::android::base::Result;
71 using ::android::base::StringAppendF;
72 using ::android::base::StringPrintf;
73 using ::android::base::Trim;
74 using ::android::base::WriteStringToFd;
75 using ::android::binder::Status;
76 using ::android::frameworks::automotive::vhal::HalPropError;
77 using ::android::frameworks::automotive::vhal::IHalPropValue;
78 using ::android::frameworks::automotive::vhal::IVhalClient;
79 using ::android::hardware::hidl_vec;
80 using ::android::hardware::interfacesEqual;
81 using ::android::hardware::Return;
82 using ::android::hidl::base::V1_0::IBase;
83 
84 namespace {
85 
86 const std::vector<TimeoutLength> kTimeouts = {TimeoutLength::TIMEOUT_CRITICAL,
87                                               TimeoutLength::TIMEOUT_MODERATE,
88                                               TimeoutLength::TIMEOUT_NORMAL};
89 
90 // TimeoutLength is also used as a message ID. Other message IDs should start next to
91 // TimeoutLength::TIMEOUT_NORMAL.
92 const int32_t MSG_VHAL_WATCHDOG_ALIVE = static_cast<int>(TimeoutLength::TIMEOUT_NORMAL) + 1;
93 const int32_t MSG_VHAL_HEALTH_CHECK = MSG_VHAL_WATCHDOG_ALIVE + 1;
94 const int32_t MSG_CACHE_VHAL_PROCESS_IDENTIFIER = MSG_VHAL_HEALTH_CHECK + 1;
95 
96 // VHAL is supposed to send heart beat every 3s. Car watchdog checks if there is the latest heart
97 // beat from VHAL within 3s, allowing 1s marginal time.
98 // If {@code ro.carwatchdog.vhal_healthcheck.interval} is set, car watchdog checks VHAL health at
99 // the given interval. The lower bound of the interval is 3s.
100 constexpr int32_t kDefaultVhalCheckIntervalSec = 3;
101 constexpr std::chrono::milliseconds kHealthCheckDelayMs = 1s;
102 
103 constexpr int32_t kMissingIntPropertyValue = -1;
104 
105 constexpr const char kPropertyVhalCheckInterval[] = "ro.carwatchdog.vhal_healthcheck.interval";
106 constexpr const char kPropertyClientCheckInterval[] = "ro.carwatchdog.client_healthcheck.interval";
107 constexpr const char kServiceName[] = "WatchdogProcessService";
108 constexpr const char kHidlVhalInterfaceName[] = "android.hardware.automotive.vehicle@2.0::IVehicle";
109 constexpr const char kAidlVhalInterfaceName[] =
110         "android.hardware.automotive.vehicle.IVehicle/default";
111 
toPidString(const std::vector<ProcessIdentifier> & processIdentifiers)112 std::string toPidString(const std::vector<ProcessIdentifier>& processIdentifiers) {
113     size_t size = processIdentifiers.size();
114     if (size == 0) {
115         return "";
116     }
117     std::string buffer;
118     StringAppendF(&buffer, "%d", processIdentifiers[0].pid);
119     for (size_t i = 1; i < size; i++) {
120         StringAppendF(&buffer, ", %d", processIdentifiers[i].pid);
121     }
122     return buffer;
123 }
124 
isSystemShuttingDown()125 bool isSystemShuttingDown() {
126     std::string sysPowerCtl;
127     std::istringstream tokenStream(GetProperty("sys.powerctl", ""));
128     std::getline(tokenStream, sysPowerCtl, ',');
129     return sysPowerCtl == "reboot" || sysPowerCtl == "shutdown";
130 }
131 
getStartTimeForPid(pid_t pid)132 int64_t getStartTimeForPid(pid_t pid) {
133     auto pidStat = UidProcStatsCollector::readStatFileForPid(pid);
134     if (!pidStat.ok()) {
135         return elapsedRealtime();
136     }
137     return pidStat->startTimeMillis;
138 }
139 
queryHidlServiceManagerForVhalPid()140 Result<pid_t> queryHidlServiceManagerForVhalPid() {
141     using android::hidl::manager::V1_0::IServiceManager;
142     pid_t pid = -1;
143     Return<void> ret = IServiceManager::getService()->debugDump([&](auto& hals) {
144         for (const auto& info : hals) {
145             if (info.pid == static_cast<int>(IServiceManager::PidConstant::NO_PID)) {
146                 continue;
147             }
148             if (info.interfaceName == kHidlVhalInterfaceName) {
149                 pid = info.pid;
150                 return;
151             }
152         }
153     });
154 
155     if (!ret.isOk()) {
156         return Error() << "Failed to get VHAL process id from HIDL service manager";
157     }
158     if (pid == -1) {
159         return Error() << "No VHAL service registered to HIDL service manager";
160     }
161     return pid;
162 }
163 
queryAidlServiceManagerForVhalPid()164 Result<pid_t> queryAidlServiceManagerForVhalPid() {
165     using ServiceDebugInfo = android::IServiceManager::ServiceDebugInfo;
166     std::vector<ServiceDebugInfo> serviceDebugInfos =
167             defaultServiceManager()->getServiceDebugInfo();
168     for (const auto& serviceDebugInfo : serviceDebugInfos) {
169         if (serviceDebugInfo.name == kAidlVhalInterfaceName) {
170             return serviceDebugInfo.pid;
171         }
172     }
173     return Error() << "No VHAL service registered to AIDL service manager";
174 }
175 
176 }  // namespace
177 
WatchdogProcessService(const sp<Looper> & handlerLooper)178 WatchdogProcessService::WatchdogProcessService(const sp<Looper>& handlerLooper) :
179       mHandlerLooper(handlerLooper),
180       mLastSessionId(0),
181       mServiceStarted(false),
182       mIsEnabled(true),
183       mVhalService(nullptr) {
184     mOnBinderDiedCallback =
185             std::make_shared<IVhalClient::OnBinderDiedCallbackFunc>([this] { handleVhalDeath(); });
186     for (const auto& timeout : kTimeouts) {
187         mClients.insert(std::make_pair(timeout, std::vector<ClientInfo>()));
188         mPingedClients.insert(std::make_pair(timeout, PingedClientMap()));
189     }
190 
191     int32_t vhalHealthCheckIntervalSec =
192             GetIntProperty(kPropertyVhalCheckInterval, kDefaultVhalCheckIntervalSec);
193     vhalHealthCheckIntervalSec = std::max(vhalHealthCheckIntervalSec, kDefaultVhalCheckIntervalSec);
194     mVhalHealthCheckWindowMs = std::chrono::seconds(vhalHealthCheckIntervalSec);
195 
196     int32_t clientHealthCheckIntervalSec =
197             GetIntProperty(kPropertyClientCheckInterval, kMissingIntPropertyValue);
198     // Overridden timeout value must be greater than or equal to the maximum possible timeout value.
199     // Otherwise, clients will be pinged more frequently than the guaranteed timeout duration.
200     if (clientHealthCheckIntervalSec != kMissingIntPropertyValue) {
201         int32_t normalSec = std::chrono::duration_cast<std::chrono::seconds>(
202                                     getTimeoutDurationNs(TimeoutLength::TIMEOUT_NORMAL))
203                                     .count();
204         mOverriddenClientHealthCheckWindowNs = std::optional<std::chrono::seconds>{
205                 std::max(clientHealthCheckIntervalSec, normalSec)};
206     }
207 
208     mGetStartTimeForPidFunc = &getStartTimeForPid;
209 }
210 
registerWatchdogServiceHelper(const sp<WatchdogServiceHelperInterface> & helper)211 Result<void> WatchdogProcessService::registerWatchdogServiceHelper(
212         const sp<WatchdogServiceHelperInterface>& helper) {
213     if (helper == nullptr) {
214         return Error() << "Must provide a non-null watchdog service helper instance";
215     }
216     Mutex::Autolock lock(mMutex);
217     mWatchdogServiceHelper = helper;
218     return {};
219 }
220 
registerClient(const sp<ICarWatchdogClient> & client,TimeoutLength timeout)221 Status WatchdogProcessService::registerClient(const sp<ICarWatchdogClient>& client,
222                                               TimeoutLength timeout) {
223     if (client == nullptr) {
224         return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
225                                          "Must provide non-null client");
226     }
227     pid_t callingPid = IPCThreadState::self()->getCallingPid();
228     uid_t callingUid = IPCThreadState::self()->getCallingUid();
229 
230     ClientInfo clientInfo(client, callingPid, callingUid, mGetStartTimeForPidFunc(callingPid));
231     return registerClient(clientInfo, timeout);
232 }
233 
unregisterClient(const sp<ICarWatchdogClient> & client)234 Status WatchdogProcessService::unregisterClient(const sp<ICarWatchdogClient>& client) {
235     if (client == nullptr) {
236         return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
237                                          "Must provide non-null client");
238     }
239     Mutex::Autolock lock(mMutex);
240     sp<IBinder> binder = BnCarWatchdogClient::asBinder(client);
241     // kTimeouts is declared as global static constant to cover all kinds of timeout (CRITICAL,
242     // MODERATE, NORMAL).
243     return unregisterClientLocked(kTimeouts, binder, ClientType::Regular);
244 }
245 
registerCarWatchdogService(const sp<IBinder> & binder)246 Status WatchdogProcessService::registerCarWatchdogService(const sp<IBinder>& binder) {
247     pid_t callingPid = IPCThreadState::self()->getCallingPid();
248     uid_t callingUid = IPCThreadState::self()->getCallingUid();
249 
250     sp<WatchdogServiceHelperInterface> helper;
251     {
252         Mutex::Autolock lock(mMutex);
253         if (mWatchdogServiceHelper == nullptr) {
254             return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE,
255                                              "Watchdog service helper instance is null");
256         }
257         helper = mWatchdogServiceHelper;
258     }
259 
260     ClientInfo clientInfo(helper, binder, callingPid, callingUid,
261                           mGetStartTimeForPidFunc(callingPid));
262     return registerClient(clientInfo, TimeoutLength::TIMEOUT_CRITICAL);
263 }
264 
unregisterCarWatchdogService(const sp<IBinder> & binder)265 void WatchdogProcessService::unregisterCarWatchdogService(const sp<IBinder>& binder) {
266     Mutex::Autolock lock(mMutex);
267 
268     std::vector<TimeoutLength> timeouts = {TimeoutLength::TIMEOUT_CRITICAL};
269     unregisterClientLocked(timeouts, binder, ClientType::Service);
270 }
271 
registerMonitor(const sp<aawi::ICarWatchdogMonitor> & monitor)272 Status WatchdogProcessService::registerMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
273     if (monitor == nullptr) {
274         return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
275                                          "Must provide non-null monitor");
276     }
277     sp<BinderDeathRecipient> binderDeathRecipient;
278     sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
279     {
280         Mutex::Autolock lock(mMutex);
281         if (mBinderDeathRecipient == nullptr) {
282             return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE,
283                                              "Service is not initialized");
284         }
285         if (mMonitor != nullptr) {
286             if (binder == aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
287                 return Status::ok();
288             }
289             aawi::BnCarWatchdogMonitor::asBinder(mMonitor)->unlinkToDeath(mBinderDeathRecipient);
290         }
291         mMonitor = monitor;
292         binderDeathRecipient = mBinderDeathRecipient;
293     }
294     if (status_t ret = binder->linkToDeath(binderDeathRecipient); ret != OK) {
295         {
296             Mutex::Autolock lock(mMutex);
297             if (mMonitor != nullptr && binder == aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
298                 mMonitor.clear();
299             }
300         }
301         ALOGW("Failed to register the monitor as it is dead.");
302         return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, "The monitor is dead.");
303     }
304     if (DEBUG) {
305         ALOGD("Car watchdog monitor is registered");
306     }
307     return Status::ok();
308 }
309 
unregisterMonitor(const sp<aawi::ICarWatchdogMonitor> & monitor)310 Status WatchdogProcessService::unregisterMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
311     if (monitor == nullptr) {
312         return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
313                                          "Must provide non-null monitor");
314     }
315     Mutex::Autolock lock(mMutex);
316     if (mBinderDeathRecipient == nullptr) {
317         return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, "Service is not initialized");
318     }
319     sp<IBinder> curBinder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
320     sp<IBinder> newBinder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
321     if (curBinder != newBinder) {
322         ALOGW("Failed to unregister the monitor as it has not been registered.");
323         return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
324                                          "The monitor has not been registered.");
325     }
326     curBinder->unlinkToDeath(mBinderDeathRecipient);
327     mMonitor.clear();
328     if (DEBUG) {
329         ALOGD("Car watchdog monitor is unregistered");
330     }
331     return Status::ok();
332 }
333 
tellClientAlive(const sp<ICarWatchdogClient> & client,int32_t sessionId)334 Status WatchdogProcessService::tellClientAlive(const sp<ICarWatchdogClient>& client,
335                                                int32_t sessionId) {
336     Mutex::Autolock lock(mMutex);
337     return tellClientAliveLocked(BnCarWatchdogClient::asBinder(client), sessionId);
338 }
339 
tellCarWatchdogServiceAlive(const sp<ICarWatchdogServiceForSystem> & service,const std::vector<ProcessIdentifier> & clientsNotResponding,int32_t sessionId)340 Status WatchdogProcessService::tellCarWatchdogServiceAlive(
341         const sp<ICarWatchdogServiceForSystem>& service,
342         const std::vector<ProcessIdentifier>& clientsNotResponding, int32_t sessionId) {
343     Status status;
344     {
345         Mutex::Autolock lock(mMutex);
346         if (DEBUG) {
347             if (clientsNotResponding.size() > 0) {
348                 ALOGD("CarWatchdogService(session: %d) responded with non-responding clients: %s",
349                       sessionId, toPidString(clientsNotResponding).c_str());
350             }
351         }
352         status = tellClientAliveLocked(BnCarWatchdogServiceForSystem::asBinder(service), sessionId);
353     }
354     if (status.isOk()) {
355         dumpAndKillAllProcesses(clientsNotResponding, /*reportToVhal=*/true);
356     }
357     return status;
358 }
359 
tellDumpFinished(const sp<aawi::ICarWatchdogMonitor> & monitor,const ProcessIdentifier & processIdentifier)360 Status WatchdogProcessService::tellDumpFinished(const sp<aawi::ICarWatchdogMonitor>& monitor,
361                                                 const ProcessIdentifier& processIdentifier) {
362     Mutex::Autolock lock(mMutex);
363     if (mMonitor == nullptr || monitor == nullptr ||
364         aawi::BnCarWatchdogMonitor::asBinder(monitor) !=
365                 aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
366         return Status::
367                 fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
368                                   "The monitor is not registered or an invalid monitor is given");
369     }
370     ALOGI("Process(pid: %d) has been dumped and killed", processIdentifier.pid);
371     return Status::ok();
372 }
373 
setEnabled(bool isEnabled)374 void WatchdogProcessService::setEnabled(bool isEnabled) {
375     Mutex::Autolock lock(mMutex);
376     if (mIsEnabled == isEnabled) {
377         return;
378     }
379     ALOGI("%s is %s", kServiceName, isEnabled ? "enabled" : "disabled");
380     mIsEnabled = isEnabled;
381     mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_HEALTH_CHECK);
382     if (!mIsEnabled) {
383         return;
384     }
385     if (mNotSupportedVhalProperties.count(VehicleProperty::VHAL_HEARTBEAT) == 0) {
386         mVhalHeartBeat.eventTime = uptimeMillis();
387         std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs;
388         mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler,
389                                            Message(MSG_VHAL_HEALTH_CHECK));
390     }
391     for (const auto& timeout : kTimeouts) {
392         mHandlerLooper->removeMessages(mMessageHandler, static_cast<int>(timeout));
393         startHealthCheckingLocked(timeout);
394     }
395 }
396 
onUserStateChange(userid_t userId,bool isStarted)397 void WatchdogProcessService::onUserStateChange(userid_t userId, bool isStarted) {
398     std::string buffer;
399     Mutex::Autolock lock(mMutex);
400     if (isStarted) {
401         mStoppedUserIds.erase(userId);
402     } else {
403         mStoppedUserIds.insert(userId);
404     }
405 }
406 
dump(int fd,const Vector<String16> &)407 Result<void> WatchdogProcessService::dump(int fd, const Vector<String16>& /*args*/) {
408     Mutex::Autolock lock(mMutex);
409     const char* indent = "  ";
410     const char* doubleIndent = "    ";
411     std::string buffer;
412     WriteStringToFd("CAR WATCHDOG PROCESS SERVICE\n", fd);
413     WriteStringToFd(StringPrintf("%s%s enabled: %s\n", indent, kServiceName,
414                                  mIsEnabled ? "true" : "false"),
415                     fd);
416     WriteStringToFd(StringPrintf("%sRegistered clients\n", indent), fd);
417     int count = 1;
418     for (const auto& timeout : kTimeouts) {
419         std::vector<ClientInfo>& clients = mClients[timeout];
420         for (auto it = clients.begin(); it != clients.end(); it++, count++) {
421             WriteStringToFd(StringPrintf("%sClient #%d: %s\n", doubleIndent, count,
422                                          it->toString().c_str()),
423                             fd);
424         }
425     }
426     WriteStringToFd(StringPrintf("%sMonitor registered: %s\n", indent,
427                                  mMonitor == nullptr ? "false" : "true"),
428                     fd);
429     WriteStringToFd(StringPrintf("%sisSystemShuttingDown: %s\n", indent,
430                                  isSystemShuttingDown() ? "true" : "false"),
431                     fd);
432     buffer = "none";
433     bool first = true;
434     for (const auto& userId : mStoppedUserIds) {
435         if (first) {
436             buffer = StringPrintf("%d", userId);
437             first = false;
438         } else {
439             StringAppendF(&buffer, ", %d", userId);
440         }
441     }
442     WriteStringToFd(StringPrintf("%sStopped users: %s\n", indent, buffer.c_str()), fd);
443     WriteStringToFd(StringPrintf("%sVHAL health check interval: %lldms\n", indent,
444                                  mVhalHealthCheckWindowMs.count()),
445                     fd);
446     if (mVhalProcessIdentifier.has_value()) {
447         WriteStringToFd(StringPrintf("%sVHAL process identifier (PID = %d, Start time millis = "
448                                      "%" PRIi64 ")",
449                                      indent, mVhalProcessIdentifier->pid,
450                                      mVhalProcessIdentifier->startTimeMillis),
451                         fd);
452     }
453     return {};
454 }
455 
doHealthCheck(int what)456 void WatchdogProcessService::doHealthCheck(int what) {
457     mHandlerLooper->removeMessages(mMessageHandler, what);
458     if (Mutex::Autolock lock(mMutex); !mIsEnabled) {
459         return;
460     }
461     const TimeoutLength timeout = static_cast<TimeoutLength>(what);
462     dumpAndKillClientsIfNotResponding(timeout);
463 
464     /* Generates a temporary/local vector containing clients.
465      * Using a local copy may send unnecessary ping messages to clients after they are unregistered.
466      * Clients should be able to handle them.
467      */
468     std::vector<ClientInfo> clientsToCheck;
469     PingedClientMap& pingedClients = mPingedClients[timeout];
470     {
471         Mutex::Autolock lock(mMutex);
472         pingedClients.clear();
473         clientsToCheck = mClients[timeout];
474         for (auto& clientInfo : clientsToCheck) {
475             if (mStoppedUserIds.count(clientInfo.userId) > 0) {
476                 continue;
477             }
478             int sessionId = getNewSessionId();
479             clientInfo.sessionId = sessionId;
480             pingedClients.insert(std::make_pair(sessionId, clientInfo));
481         }
482     }
483 
484     for (const auto& clientInfo : clientsToCheck) {
485         Status status = clientInfo.checkIfAlive(timeout);
486         if (!status.isOk()) {
487             ALOGW("Sending a ping message to client(pid: %d) failed: %s", clientInfo.pid,
488                   status.exceptionMessage().c_str());
489             {
490                 Mutex::Autolock lock(mMutex);
491                 pingedClients.erase(clientInfo.sessionId);
492             }
493         }
494     }
495     // Though the size of pingedClients is a more specific measure, clientsToCheck is used as a
496     // conservative approach.
497     if (clientsToCheck.size() > 0) {
498         auto durationNs = getTimeoutDurationNs(timeout);
499         mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
500     }
501 }
502 
start()503 Result<void> WatchdogProcessService::start() {
504     {
505         Mutex::Autolock lock(mMutex);
506         if (mServiceStarted) {
507             return Error(INVALID_OPERATION) << "Cannot start process monitoring more than once";
508         }
509         auto thiz = sp<WatchdogProcessService>::fromExisting(this);
510         mMessageHandler = sp<MessageHandlerImpl>::make(thiz);
511         mBinderDeathRecipient = sp<BinderDeathRecipient>::make(thiz);
512         mPropertyChangeListener = std::make_shared<PropertyChangeListener>(thiz);
513         mServiceStarted = true;
514     }
515     reportWatchdogAliveToVhal();
516     return {};
517 }
518 
terminate()519 void WatchdogProcessService::terminate() {
520     Mutex::Autolock lock(mMutex);
521     if (!mServiceStarted) {
522         return;
523     }
524     for (const auto& timeout : kTimeouts) {
525         std::vector<ClientInfo>& clients = mClients[timeout];
526         for (auto it = clients.begin(); it != clients.end();) {
527             it->unlinkToDeath(mBinderDeathRecipient);
528             it = clients.erase(it);
529         }
530     }
531     mWatchdogServiceHelper.clear();
532     if (mMonitor != nullptr) {
533         sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
534         binder->unlinkToDeath(mBinderDeathRecipient);
535     }
536     mBinderDeathRecipient.clear();
537     mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_HEALTH_CHECK);
538     mServiceStarted = false;
539     if (mVhalService == nullptr) {
540         return;
541     }
542     if (mNotSupportedVhalProperties.count(VehicleProperty::VHAL_HEARTBEAT) == 0) {
543         std::vector<int32_t> propIds = {static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT)};
544         auto result =
545                 mVhalService->getSubscriptionClient(mPropertyChangeListener)->unsubscribe(propIds);
546         if (!result.ok()) {
547             ALOGW("Failed to unsubscribe from VHAL_HEARTBEAT.");
548         }
549     }
550     mVhalService->removeOnBinderDiedCallback(mOnBinderDiedCallback);
551     mVhalService.reset();
552 }
553 
registerClient(const ClientInfo & clientInfo,TimeoutLength timeout)554 Status WatchdogProcessService::registerClient(const ClientInfo& clientInfo, TimeoutLength timeout) {
555     sp<BinderDeathRecipient> binderDeathRecipient;
556     {
557         Mutex::Autolock lock(mMutex);
558         if (mBinderDeathRecipient == nullptr) {
559             return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE,
560                                              "Service is not initialized");
561         }
562         if (findClientAndProcessLocked(kTimeouts, clientInfo, nullptr)) {
563             ALOGW("Failed to register (%s) as it is already registered.",
564                   clientInfo.toString().c_str());
565             return Status::ok();
566         }
567         std::vector<ClientInfo>& clients = mClients[timeout];
568         clients.emplace_back(clientInfo);
569         binderDeathRecipient = mBinderDeathRecipient;
570     }
571     if (status_t status = clientInfo.linkToDeath(binderDeathRecipient); status != OK) {
572         Mutex::Autolock lock(mMutex);
573         std::vector<TimeoutLength> timeouts = {timeout};
574         findClientAndProcessLocked(timeouts, clientInfo,
575                                    [&](std::vector<ClientInfo>& clients,
576                                        std::vector<ClientInfo>::const_iterator it) {
577                                        clients.erase(it);
578                                    });
579         ALOGW("Failed to register (%s) as it is dead", clientInfo.toString().c_str());
580         std::string errorStr = StringPrintf("(%s) is dead", clientInfo.toString().c_str());
581         return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, errorStr.c_str());
582     }
583     if (DEBUG) {
584         ALOGD("Car watchdog client (%s, timeout = %d) is registered", clientInfo.toString().c_str(),
585               timeout);
586     }
587     Mutex::Autolock lock(mMutex);
588     // If the client array becomes non-empty, start health checking.
589     if (mClients[timeout].size() == 1) {
590         startHealthCheckingLocked(timeout);
591         ALOGI("Starting health checking for timeout = %d", timeout);
592     }
593     return Status::ok();
594 }
595 
unregisterClientLocked(const std::vector<TimeoutLength> & timeouts,sp<IBinder> binder,ClientType clientType)596 Status WatchdogProcessService::unregisterClientLocked(const std::vector<TimeoutLength>& timeouts,
597                                                       sp<IBinder> binder, ClientType clientType) {
598     const char* clientName = clientType == ClientType::Regular ? "client" : "watchdog service";
599     bool result = findClientAndProcessLocked(timeouts, binder,
600                                              [&](std::vector<ClientInfo>& clients,
601                                                  std::vector<ClientInfo>::const_iterator it) {
602                                                  it->unlinkToDeath(mBinderDeathRecipient);
603                                                  clients.erase(it);
604                                              });
605     if (!result) {
606         std::string errorStr = StringPrintf("The %s has not been registered", clientName);
607         const char* errorCause = errorStr.c_str();
608         ALOGW("Failed to unregister the %s: %s", clientName, errorCause);
609         return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, errorCause);
610     }
611     if (DEBUG) {
612         ALOGD("Car watchdog %s is unregistered", clientName);
613     }
614     return Status::ok();
615 }
616 
tellClientAliveLocked(const sp<IBinder> & binder,int32_t sessionId)617 Status WatchdogProcessService::tellClientAliveLocked(const sp<IBinder>& binder, int32_t sessionId) {
618     for (const auto& timeout : kTimeouts) {
619         PingedClientMap& clients = mPingedClients[timeout];
620         PingedClientMap::const_iterator it = clients.find(sessionId);
621         if (it == clients.cend() || !it->second.matchesBinder(binder)) {
622             continue;
623         }
624         clients.erase(it);
625         return Status::ok();
626     }
627     return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
628                                      "The client is not registered or the session ID is not found");
629 }
630 
findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,const ClientInfo & clientInfo,const Processor & processor)631 bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
632                                                         const ClientInfo& clientInfo,
633                                                         const Processor& processor) {
634     for (const auto& timeout : timeouts) {
635         std::vector<ClientInfo>& clients = mClients[timeout];
636         for (auto it = clients.begin(); it != clients.end(); it++) {
637             if (std::as_const(*it) != clientInfo) {
638                 continue;
639             }
640             if (processor != nullptr) {
641                 processor(clients, it);
642             }
643             return true;
644         }
645     }
646     return false;
647 }
648 
findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,const sp<IBinder> binder,const Processor & processor)649 bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
650                                                         const sp<IBinder> binder,
651                                                         const Processor& processor) {
652     for (const auto& timeout : timeouts) {
653         std::vector<ClientInfo>& clients = mClients[timeout];
654         for (auto it = clients.begin(); it != clients.end(); it++) {
655             if (!it->matchesBinder(binder)) {
656                 continue;
657             }
658             if (processor != nullptr) {
659                 processor(clients, it);
660             }
661             return true;
662         }
663     }
664     return false;
665 }
666 
startHealthCheckingLocked(TimeoutLength timeout)667 Result<void> WatchdogProcessService::startHealthCheckingLocked(TimeoutLength timeout) {
668     PingedClientMap& clients = mPingedClients[timeout];
669     clients.clear();
670     int what = static_cast<int>(timeout);
671     auto durationNs = getTimeoutDurationNs(timeout);
672     mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
673     return {};
674 }
675 
dumpAndKillClientsIfNotResponding(TimeoutLength timeout)676 Result<void> WatchdogProcessService::dumpAndKillClientsIfNotResponding(TimeoutLength timeout) {
677     std::vector<ProcessIdentifier> processIdentifiers;
678     std::vector<const ClientInfo*> clientsToNotify;
679     {
680         Mutex::Autolock lock(mMutex);
681         PingedClientMap& clients = mPingedClients[timeout];
682         for (PingedClientMap::const_iterator it = clients.cbegin(); it != clients.cend(); it++) {
683             pid_t pid = -1;
684             userid_t userId = -1;
685             uint64_t startTimeMillis = 0;
686             std::vector<TimeoutLength> timeouts = {timeout};
687             findClientAndProcessLocked(timeouts, it->second,
688                                        [&](std::vector<ClientInfo>& cachedClients,
689                                            std::vector<ClientInfo>::const_iterator
690                                                    cachedClientsIt) {
691                                            pid = cachedClientsIt->pid;
692                                            startTimeMillis = cachedClientsIt->startTimeMillis;
693                                            userId = cachedClientsIt->userId;
694                                            cachedClients.erase(cachedClientsIt);
695                                        });
696             if (pid != -1 && mStoppedUserIds.count(userId) == 0) {
697                 clientsToNotify.emplace_back(&it->second);
698                 ProcessIdentifier processIdentifier;
699                 processIdentifier.pid = pid;
700                 processIdentifier.startTimeMillis = startTimeMillis;
701                 processIdentifiers.push_back(processIdentifier);
702             }
703         }
704     }
705     for (const ClientInfo*& clientInfo : clientsToNotify) {
706         clientInfo->prepareProcessTermination();
707     }
708     return dumpAndKillAllProcesses(processIdentifiers, /*reportToVhal=*/true);
709 }
710 
dumpAndKillAllProcesses(const std::vector<ProcessIdentifier> & processesNotResponding,bool reportToVhal)711 Result<void> WatchdogProcessService::dumpAndKillAllProcesses(
712         const std::vector<ProcessIdentifier>& processesNotResponding, bool reportToVhal) {
713     size_t size = processesNotResponding.size();
714     if (size == 0) {
715         return {};
716     }
717     std::string pidString = toPidString(processesNotResponding);
718     sp<aawi::ICarWatchdogMonitor> monitor;
719     {
720         Mutex::Autolock lock(mMutex);
721         if (mMonitor == nullptr) {
722             std::string errorMsg =
723                     StringPrintf("Failed to dump and kill processes(pid = %s): Monitor is not set",
724                                  pidString.c_str());
725             ALOGW("%s", errorMsg.c_str());
726             return Error() << errorMsg;
727         }
728         monitor = mMonitor;
729     }
730     if (isSystemShuttingDown()) {
731         ALOGI("Skip dumping and killing processes(%s): The system is shutting down",
732               pidString.c_str());
733         return {};
734     }
735     if (reportToVhal) {
736         reportTerminatedProcessToVhal(processesNotResponding);
737     }
738     monitor->onClientsNotResponding(processesNotResponding);
739     if (DEBUG) {
740         ALOGD("Dumping and killing processes is requested: %s", pidString.c_str());
741     }
742     return {};
743 }
744 
745 // Handle when car watchdog clients die.
handleBinderDeath(const wp<IBinder> & who)746 void WatchdogProcessService::handleBinderDeath(const wp<IBinder>& who) {
747     Mutex::Autolock lock(mMutex);
748     IBinder* binder = who.unsafe_get();
749     // Check if dead binder is monitor.
750     sp<IBinder> monitor = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
751     if (monitor == binder) {
752         mMonitor.clear();
753         ALOGW("The monitor has died.");
754         return;
755     }
756     findClientAndProcessLocked(kTimeouts, who.promote(),
757                                [&](std::vector<ClientInfo>& clients,
758                                    std::vector<ClientInfo>::const_iterator it) {
759                                    ALOGW("Client(pid: %d) died", it->pid);
760                                    clients.erase(it);
761                                });
762 }
763 
764 // Handle when VHAL dies.
handleVhalDeath()765 void WatchdogProcessService::handleVhalDeath() {
766     Mutex::Autolock lock(mMutex);
767     ALOGW("VHAL has died.");
768     mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_HEALTH_CHECK);
769     // Destroying mVHalService would remove all onBinderDied callbacks.
770     mVhalService.reset();
771 }
772 
reportWatchdogAliveToVhal()773 void WatchdogProcessService::reportWatchdogAliveToVhal() {
774     if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_ALIVE) > 0) {
775         ALOGW("VHAL doesn't support WATCHDOG_ALIVE. Car watchdog will not update WATCHDOG_ALIVE.");
776         return;
777     }
778     int64_t systemUptime = uptimeMillis();
779     VehiclePropValue propValue{
780             .prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_ALIVE),
781             .value.int64Values = {systemUptime},
782     };
783     const auto& ret = updateVhal(propValue);
784     if (!ret.ok()) {
785         ALOGW("Failed to update WATCHDOG_ALIVE VHAL property. Will try again in 3s, error: %s",
786               ret.error().message().c_str());
787     }
788     // Update VHAL with the interval of TIMEOUT_CRITICAL(3s).
789     auto durationNs = getTimeoutDurationNs(TimeoutLength::TIMEOUT_CRITICAL);
790     mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_WATCHDOG_ALIVE);
791     mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler,
792                                        Message(MSG_VHAL_WATCHDOG_ALIVE));
793 }
794 
reportTerminatedProcessToVhal(const std::vector<ProcessIdentifier> & processesNotResponding)795 void WatchdogProcessService::reportTerminatedProcessToVhal(
796         const std::vector<ProcessIdentifier>& processesNotResponding) {
797     if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_TERMINATED_PROCESS) > 0) {
798         ALOGW("VHAL doesn't support WATCHDOG_TERMINATED_PROCESS. Terminated process is not "
799               "reported to VHAL.");
800         return;
801     }
802     for (auto&& processIdentifier : processesNotResponding) {
803         const auto& retCmdLine = readProcCmdLine(processIdentifier.pid);
804         if (!retCmdLine.ok()) {
805             ALOGW("Failed to get process command line for pid(%d): %s", processIdentifier.pid,
806                   retCmdLine.error().message().c_str());
807             continue;
808         }
809         std::string procCmdLine = retCmdLine.value();
810         VehiclePropValue propValue{
811                 .prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_TERMINATED_PROCESS),
812                 .value.int32Values = {static_cast<int32_t>(
813                         ProcessTerminationReason::NOT_RESPONDING)},
814                 .value.stringValue = procCmdLine,
815         };
816         const auto& retUpdate = updateVhal(propValue);
817         if (!retUpdate.ok()) {
818             ALOGW("Failed to update WATCHDOG_TERMINATED_PROCESS VHAL property(command line: %s)",
819                   procCmdLine.c_str());
820         }
821     }
822 }
823 
updateVhal(const VehiclePropValue & value)824 Result<void> WatchdogProcessService::updateVhal(const VehiclePropValue& value) {
825     Mutex::Autolock lock(mMutex);
826     const auto& connectRet = connectToVhalLocked();
827     if (!connectRet.ok()) {
828         std::string errorMsg = "VHAL is not connected: " + connectRet.error().message();
829         ALOGW("%s", errorMsg.c_str());
830         return Error() << errorMsg;
831     }
832     int32_t propId = value.prop;
833     if (mNotSupportedVhalProperties.count(static_cast<VehicleProperty>(propId)) > 0) {
834         std::string errorMsg = StringPrintf("VHAL doesn't support property(id: %d)", propId);
835         ALOGW("%s", errorMsg.c_str());
836         return Error() << errorMsg;
837     }
838 
839     auto halPropValue = mVhalService->createHalPropValue(propId);
840     halPropValue->setInt32Values(value.value.int32Values);
841     halPropValue->setInt64Values(value.value.int64Values);
842     halPropValue->setStringValue(value.value.stringValue);
843     if (auto result = mVhalService->setValueSync(*halPropValue); !result.ok()) {
844         return Error() << "Failed to set propValue(" << propId
845                        << ") to VHAL, error: " << result.error().message();
846     }
847 
848     return {};
849 }
850 
readProcCmdLine(int32_t pid)851 Result<std::string> WatchdogProcessService::readProcCmdLine(int32_t pid) {
852     std::string cmdLinePath = StringPrintf("/proc/%d/cmdline", pid);
853     std::string procCmdLine;
854     if (ReadFileToString(cmdLinePath, &procCmdLine)) {
855         std::replace(procCmdLine.begin(), procCmdLine.end(), '\0', ' ');
856         procCmdLine = Trim(procCmdLine);
857         return procCmdLine;
858     }
859     return Error() << "Failed to read " << cmdLinePath;
860 }
861 
connectToVhalLocked()862 Result<void> WatchdogProcessService::connectToVhalLocked() {
863     if (mVhalService != nullptr) {
864         return {};
865     }
866     mVhalService = IVhalClient::tryCreate();
867     if (mVhalService == nullptr) {
868         return Error() << "Failed to connect to VHAL.";
869     }
870     mVhalService->addOnBinderDiedCallback(mOnBinderDiedCallback);
871     queryVhalPropertiesLocked();
872     subscribeToVhalHeartBeatLocked();
873     ALOGI("Successfully connected to VHAL.");
874     return {};
875 }
876 
queryVhalPropertiesLocked()877 void WatchdogProcessService::queryVhalPropertiesLocked() {
878     mNotSupportedVhalProperties.clear();
879     std::vector<VehicleProperty> propIds = {VehicleProperty::WATCHDOG_ALIVE,
880                                             VehicleProperty::WATCHDOG_TERMINATED_PROCESS,
881                                             VehicleProperty::VHAL_HEARTBEAT};
882     for (const auto& propId : propIds) {
883         if (!isVhalPropertySupportedLocked(propId)) {
884             mNotSupportedVhalProperties.insert(propId);
885         }
886     }
887 }
888 
isVhalPropertySupportedLocked(VehicleProperty propId)889 bool WatchdogProcessService::isVhalPropertySupportedLocked(VehicleProperty propId) {
890     auto result = mVhalService->getPropConfigs({static_cast<int32_t>(propId)});
891     return result.ok();
892 }
893 
subscribeToVhalHeartBeatLocked()894 void WatchdogProcessService::subscribeToVhalHeartBeatLocked() {
895     if (mNotSupportedVhalProperties.count(VehicleProperty::VHAL_HEARTBEAT) > 0) {
896         ALOGW("VHAL doesn't support VHAL_HEARTBEAT. Checking VHAL health is disabled.");
897         return;
898     }
899 
900     mVhalHeartBeat = {
901             .eventTime = 0,
902             .value = 0,
903     };
904 
905     std::vector<SubscribeOptions> options = {
906             {.propId = static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT), .areaIds = {}},
907     };
908     if (auto result =
909                 mVhalService->getSubscriptionClient(mPropertyChangeListener)->subscribe(options);
910         !result.ok()) {
911         ALOGW("Failed to subscribe to VHAL_HEARTBEAT. Checking VHAL health is disabled. '%s'",
912               result.error().message().c_str());
913         return;
914     }
915     std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs;
916     mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler,
917                                        Message(MSG_VHAL_HEALTH_CHECK));
918     // VHAL process identifier is required only when termiating the VHAL process. VHAL process is
919     // terminated only when the VHAL is unhealthy. However, caching the process identifier as soon
920     // as connecting to VHAL guarantees the correct PID is cached. Because the VHAL pid is queried
921     // from the service manager, the caching should be performed outside the class level lock. So,
922     // handle the caching in the handler thread after successfully subscribing to the VHAL_HEARTBEAT
923     // property.
924     mHandlerLooper->sendMessage(mMessageHandler, Message(MSG_CACHE_VHAL_PROCESS_IDENTIFIER));
925     return;
926 }
927 
cacheVhalProcessIdentifier()928 bool WatchdogProcessService::cacheVhalProcessIdentifier() {
929     pid_t pid = -1;
930     if (Result<pid_t> hidlResult = queryHidlServiceManagerForVhalPid(); hidlResult.ok()) {
931         pid = *hidlResult;
932         ALOGI("Fetched HIDL VHAL PID %d", pid);
933     } else if (Result<pid_t> aidlResult = queryAidlServiceManagerForVhalPid(); aidlResult.ok()) {
934         pid = *aidlResult;
935         ALOGI("Fetched AIDL VHAL PID %d", pid);
936     } else {
937         ALOGE("Failed to fetch VHAL pid:\n\t%s\n\t%s", hidlResult.error().message().c_str(),
938               aidlResult.error().message().c_str());
939         return false;
940     }
941     ProcessIdentifier processIdentifier;
942     processIdentifier.pid = pid;
943     processIdentifier.startTimeMillis = mGetStartTimeForPidFunc(pid);
944 
945     Mutex::Autolock lock(mMutex);
946     mVhalProcessIdentifier = processIdentifier;
947     return true;
948 }
949 
getNewSessionId()950 int32_t WatchdogProcessService::getNewSessionId() {
951     // Make sure that session id is always positive number.
952     if (++mLastSessionId <= 0) {
953         mLastSessionId = 1;
954     }
955     return mLastSessionId;
956 }
957 
updateVhalHeartBeat(int64_t value)958 void WatchdogProcessService::updateVhalHeartBeat(int64_t value) {
959     bool wrongHeartBeat;
960     {
961         Mutex::Autolock lock(mMutex);
962         if (!mIsEnabled) {
963             return;
964         }
965         wrongHeartBeat = value <= mVhalHeartBeat.value;
966         mVhalHeartBeat.eventTime = uptimeMillis();
967         mVhalHeartBeat.value = value;
968     }
969     if (wrongHeartBeat) {
970         ALOGW("VHAL updated heart beat with a wrong value. Terminating VHAL...");
971         terminateVhal();
972         return;
973     }
974     std::chrono::nanoseconds intervalNs = mVhalHealthCheckWindowMs + kHealthCheckDelayMs;
975     mHandlerLooper->sendMessageDelayed(intervalNs.count(), mMessageHandler,
976                                        Message(MSG_VHAL_HEALTH_CHECK));
977 }
978 
checkVhalHealth()979 void WatchdogProcessService::checkVhalHealth() {
980     int64_t lastEventTime;
981     int64_t currentUptime = uptimeMillis();
982     {
983         Mutex::Autolock lock(mMutex);
984         if (mVhalService == nullptr || !mIsEnabled) {
985             return;
986         }
987         lastEventTime = mVhalHeartBeat.eventTime;
988     }
989     if (currentUptime > lastEventTime + mVhalHealthCheckWindowMs.count()) {
990         ALOGW("VHAL failed to update heart beat within timeout. Terminating VHAL...");
991         terminateVhal();
992     }
993 }
994 
terminateVhal()995 void WatchdogProcessService::terminateVhal() {
996     auto maybeDumpAndKillVhalProcess = [&]() -> bool {
997         std::optional<ProcessIdentifier> processIdentifier;
998         {
999             Mutex::Autolock lock(mMutex);
1000             processIdentifier = mVhalProcessIdentifier;
1001         }
1002         if (!processIdentifier.has_value()) {
1003             return false;
1004         }
1005         dumpAndKillAllProcesses(std::vector<ProcessIdentifier>(1, *processIdentifier),
1006                                 /*reportToVhal=*/false);
1007         return true;
1008     };
1009     if (maybeDumpAndKillVhalProcess()) {
1010         return;
1011     }
1012     if (!cacheVhalProcessIdentifier() || !maybeDumpAndKillVhalProcess()) {
1013         ALOGE("Failed to termitate VHAL: failed to fetch VHAL PID");
1014     }
1015 }
1016 
getTimeoutDurationNs(const TimeoutLength & timeout)1017 std::chrono::nanoseconds WatchdogProcessService::getTimeoutDurationNs(
1018         const TimeoutLength& timeout) {
1019     // When a default timeout has been overridden by the |kPropertyClientCheckInterval| read-only
1020     // property override the timeout value for all timeout lengths.
1021     if (mOverriddenClientHealthCheckWindowNs.has_value()) {
1022         return mOverriddenClientHealthCheckWindowNs.value();
1023     }
1024     switch (timeout) {
1025         case TimeoutLength::TIMEOUT_CRITICAL:
1026             return 3s;  // 3s and no buffer time.
1027         case TimeoutLength::TIMEOUT_MODERATE:
1028             return 6s;  // 5s + 1s as buffer time.
1029         case TimeoutLength::TIMEOUT_NORMAL:
1030             return 12s;  // 10s + 2s as buffer time.
1031     }
1032 }
1033 
toString() const1034 std::string WatchdogProcessService::ClientInfo::toString() const {
1035     std::string buffer;
1036     StringAppendF(&buffer, "pid = %d, userId = %d, type = %s", pid, userId,
1037                   type == ClientType::Regular ? "regular" : "watchdog service");
1038     return buffer;
1039 }
1040 
getBinder() const1041 sp<IBinder> WatchdogProcessService::ClientInfo::getBinder() const {
1042     if (type == ClientType::Regular) {
1043         return BnCarWatchdogClient::asBinder(client);
1044     }
1045     return watchdogServiceBinder;
1046 }
1047 
linkToDeath(const sp<IBinder::DeathRecipient> & recipient) const1048 status_t WatchdogProcessService::ClientInfo::linkToDeath(
1049         const sp<IBinder::DeathRecipient>& recipient) const {
1050     if (type == ClientType::Regular) {
1051         return BnCarWatchdogClient::asBinder(client)->linkToDeath(recipient);
1052     }
1053     // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
1054     // skip this step.
1055     return OK;
1056 }
1057 
unlinkToDeath(const wp<IBinder::DeathRecipient> & recipient) const1058 status_t WatchdogProcessService::ClientInfo::unlinkToDeath(
1059         const wp<IBinder::DeathRecipient>& recipient) const {
1060     if (type == ClientType::Regular) {
1061         return BnCarWatchdogClient::asBinder(client)->unlinkToDeath(recipient);
1062     }
1063     // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
1064     // skip this step.
1065     return OK;
1066 }
1067 
checkIfAlive(TimeoutLength timeout) const1068 Status WatchdogProcessService::ClientInfo::checkIfAlive(TimeoutLength timeout) const {
1069     if (type == ClientType::Regular) {
1070         return client->checkIfAlive(sessionId, timeout);
1071     }
1072     return watchdogServiceHelper->checkIfAlive(watchdogServiceBinder, sessionId, timeout);
1073 }
1074 
prepareProcessTermination() const1075 Status WatchdogProcessService::ClientInfo::prepareProcessTermination() const {
1076     if (type == ClientType::Regular) {
1077         return client->prepareProcessTermination();
1078     }
1079     return watchdogServiceHelper->prepareProcessTermination(watchdogServiceBinder);
1080 }
1081 
BinderDeathRecipient(const sp<WatchdogProcessService> & service)1082 WatchdogProcessService::BinderDeathRecipient::BinderDeathRecipient(
1083         const sp<WatchdogProcessService>& service) :
1084       mService(service) {}
1085 
binderDied(const wp<IBinder> & who)1086 void WatchdogProcessService::BinderDeathRecipient::binderDied(const wp<IBinder>& who) {
1087     mService->handleBinderDeath(who);
1088 }
1089 
PropertyChangeListener(const sp<WatchdogProcessService> & service)1090 WatchdogProcessService::PropertyChangeListener::PropertyChangeListener(
1091         const sp<WatchdogProcessService>& service) :
1092       mService(service) {}
1093 
onPropertyEvent(const std::vector<std::unique_ptr<IHalPropValue>> & propValues)1094 void WatchdogProcessService::PropertyChangeListener::onPropertyEvent(
1095         const std::vector<std::unique_ptr<IHalPropValue>>& propValues) {
1096     for (const auto& value : propValues) {
1097         if (value->getPropId() == static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT)) {
1098             if (value->getInt64Values().size() < 1) {
1099                 ALOGE("Invalid VHAL_HEARTBEAT value, empty value");
1100             } else {
1101                 mService->updateVhalHeartBeat(value->getInt64Values()[0]);
1102             }
1103             break;
1104         }
1105     }
1106 }
1107 
onPropertySetError(const std::vector<HalPropError> & errors)1108 void WatchdogProcessService::PropertyChangeListener::onPropertySetError(
1109         const std::vector<HalPropError>& errors) {
1110     for (const auto& error : errors) {
1111         if (error.propId != static_cast<int32_t>(VehicleProperty::WATCHDOG_ALIVE) &&
1112             error.propId != static_cast<int32_t>(VehicleProperty::WATCHDOG_TERMINATED_PROCESS)) {
1113             continue;
1114         }
1115         ALOGE("failed to set VHAL property, prop ID: %d, status: %d", error.propId,
1116               static_cast<int32_t>(error.status));
1117     }
1118 }
1119 
MessageHandlerImpl(const sp<WatchdogProcessService> & service)1120 WatchdogProcessService::MessageHandlerImpl::MessageHandlerImpl(
1121         const sp<WatchdogProcessService>& service) :
1122       mService(service) {}
1123 
handleMessage(const Message & message)1124 void WatchdogProcessService::MessageHandlerImpl::handleMessage(const Message& message) {
1125     switch (message.what) {
1126         case static_cast<int>(TimeoutLength::TIMEOUT_CRITICAL):
1127         case static_cast<int>(TimeoutLength::TIMEOUT_MODERATE):
1128         case static_cast<int>(TimeoutLength::TIMEOUT_NORMAL):
1129             mService->doHealthCheck(message.what);
1130             break;
1131         case MSG_VHAL_WATCHDOG_ALIVE:
1132             mService->reportWatchdogAliveToVhal();
1133             break;
1134         case MSG_VHAL_HEALTH_CHECK:
1135             mService->checkVhalHealth();
1136             break;
1137         case MSG_CACHE_VHAL_PROCESS_IDENTIFIER:
1138             mService->cacheVhalProcessIdentifier();
1139             break;
1140         default:
1141             ALOGW("Unknown message: %d", message.what);
1142     }
1143 }
1144 
1145 }  // namespace watchdog
1146 }  // namespace automotive
1147 }  // namespace android
1148