• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false  // STOPSHIP if true.
19 
20 #include "WatchdogProcessService.h"
21 
22 #include "WatchdogServiceHelper.h"
23 
24 #include <android-base/chrono_utils.h>
25 #include <android-base/file.h>
26 #include <android-base/macros.h>
27 #include <android-base/properties.h>
28 #include <android-base/stringprintf.h>
29 #include <android-base/strings.h>
30 #include <android/automotive/watchdog/BnCarWatchdogClient.h>
31 #include <android/automotive/watchdog/internal/BnCarWatchdogMonitor.h>
32 #include <android/automotive/watchdog/internal/BnCarWatchdogServiceForSystem.h>
33 #include <android/hardware/automotive/vehicle/2.0/types.h>
34 #include <android/hidl/manager/1.0/IServiceManager.h>
35 #include <binder/IPCThreadState.h>
36 #include <hidl/HidlTransportSupport.h>
37 #include <utils/SystemClock.h>
38 
39 #include <utility>
40 
41 namespace android {
42 namespace automotive {
43 namespace watchdog {
44 
45 namespace aawi = ::android::automotive::watchdog::internal;
46 
47 using aawi::BnCarWatchdogServiceForSystem;
48 using aawi::ICarWatchdogServiceForSystem;
49 using ::android::IBinder;
50 using ::android::sp;
51 using ::android::String16;
52 using ::android::base::Error;
53 using ::android::base::GetProperty;
54 using ::android::base::ReadFileToString;
55 using ::android::base::Result;
56 using ::android::base::StringAppendF;
57 using ::android::base::StringPrintf;
58 using ::android::base::Trim;
59 using ::android::base::WriteStringToFd;
60 using ::android::binder::Status;
61 using ::android::hardware::hidl_vec;
62 using ::android::hardware::interfacesEqual;
63 using ::android::hardware::Return;
64 using ::android::hardware::automotive::vehicle::V2_0::IVehicle;
65 using ::android::hardware::automotive::vehicle::V2_0::ProcessTerminationReason;
66 using ::android::hardware::automotive::vehicle::V2_0::StatusCode;
67 using ::android::hardware::automotive::vehicle::V2_0::SubscribeFlags;
68 using ::android::hardware::automotive::vehicle::V2_0::SubscribeOptions;
69 using ::android::hardware::automotive::vehicle::V2_0::VehiclePropConfig;
70 using ::android::hardware::automotive::vehicle::V2_0::VehicleProperty;
71 using ::android::hardware::automotive::vehicle::V2_0::VehiclePropertyStatus;
72 using ::android::hardware::automotive::vehicle::V2_0::VehiclePropValue;
73 using ::android::hidl::base::V1_0::IBase;
74 
75 namespace {
76 
77 const std::vector<TimeoutLength> kTimeouts = {TimeoutLength::TIMEOUT_CRITICAL,
78                                               TimeoutLength::TIMEOUT_MODERATE,
79                                               TimeoutLength::TIMEOUT_NORMAL};
80 
81 // TimeoutLength is also used as a message ID. Other message IDs should start next to
82 // TimeoutLength::TIMEOUT_NORMAL.
83 const int32_t MSG_VHAL_WATCHDOG_ALIVE = static_cast<int>(TimeoutLength::TIMEOUT_NORMAL) + 1;
84 const int32_t MSG_VHAL_HEALTH_CHECK = MSG_VHAL_WATCHDOG_ALIVE + 1;
85 
86 // VHAL sends heart beat every 3s. Car watchdog checks if there is the latest heart beat from VHAL
87 // with 1s marginal time.
88 constexpr std::chrono::nanoseconds kVhalHealthCheckDelayNs = 4s;
89 constexpr int64_t kVhalHeartBeatIntervalMs = 3000;
90 
91 constexpr const char kServiceName[] = "WatchdogProcessService";
92 constexpr const char kVhalInterfaceName[] = "android.hardware.automotive.vehicle@2.0::IVehicle";
93 
timeoutToDurationNs(const TimeoutLength & timeout)94 std::chrono::nanoseconds timeoutToDurationNs(const TimeoutLength& timeout) {
95     switch (timeout) {
96         case TimeoutLength::TIMEOUT_CRITICAL:
97             return 3s;  // 3s and no buffer time.
98         case TimeoutLength::TIMEOUT_MODERATE:
99             return 6s;  // 5s + 1s as buffer time.
100         case TimeoutLength::TIMEOUT_NORMAL:
101             return 12s;  // 10s + 2s as buffer time.
102     }
103 }
104 
pidArrayToString(const std::vector<int32_t> & pids)105 std::string pidArrayToString(const std::vector<int32_t>& pids) {
106     size_t size = pids.size();
107     if (size == 0) {
108         return "";
109     }
110     std::string buffer;
111     StringAppendF(&buffer, "%d", pids[0]);
112     for (int i = 1; i < size; i++) {
113         int pid = pids[i];
114         StringAppendF(&buffer, ", %d", pid);
115     }
116     return buffer;
117 }
118 
isSystemShuttingDown()119 bool isSystemShuttingDown() {
120     std::string sysPowerCtl;
121     std::istringstream tokenStream(GetProperty("sys.powerctl", ""));
122     std::getline(tokenStream, sysPowerCtl, ',');
123     return sysPowerCtl == "reboot" || sysPowerCtl == "shutdown";
124 }
125 
126 }  // namespace
127 
WatchdogProcessService(const sp<Looper> & handlerLooper)128 WatchdogProcessService::WatchdogProcessService(const sp<Looper>& handlerLooper) :
129       mHandlerLooper(handlerLooper),
130       mIsEnabled(true),
131       mLastSessionId(0),
132       mServiceStarted(false),
133       mVhalService(nullptr) {
134     mMessageHandler = sp<MessageHandlerImpl>::make(this);
135     mBinderDeathRecipient = sp<BinderDeathRecipient>::make(this);
136     mHidlDeathRecipient = sp<HidlDeathRecipient>::make(this);
137     mPropertyChangeListener = sp<PropertyChangeListener>::make(this);
138     for (const auto& timeout : kTimeouts) {
139         mClients.insert(std::make_pair(timeout, std::vector<ClientInfo>()));
140         mPingedClients.insert(std::make_pair(timeout, PingedClientMap()));
141     }
142 }
registerWatchdogServiceHelper(const sp<IWatchdogServiceHelper> & helper)143 Result<void> WatchdogProcessService::registerWatchdogServiceHelper(
144         const sp<IWatchdogServiceHelper>& helper) {
145     if (helper == nullptr) {
146         return Error() << "Must provide a non-null watchdog service helper instance";
147     }
148     Mutex::Autolock lock(mMutex);
149     mWatchdogServiceHelper = helper;
150     return {};
151 }
152 
registerClient(const sp<ICarWatchdogClient> & client,TimeoutLength timeout)153 Status WatchdogProcessService::registerClient(const sp<ICarWatchdogClient>& client,
154                                               TimeoutLength timeout) {
155     pid_t callingPid = IPCThreadState::self()->getCallingPid();
156     uid_t callingUid = IPCThreadState::self()->getCallingUid();
157     ClientInfo clientInfo(client, callingPid, callingUid);
158 
159     Mutex::Autolock lock(mMutex);
160     return registerClientLocked(clientInfo, timeout);
161 }
162 
unregisterClient(const sp<ICarWatchdogClient> & client)163 Status WatchdogProcessService::unregisterClient(const sp<ICarWatchdogClient>& client) {
164     Mutex::Autolock lock(mMutex);
165     sp<IBinder> binder = BnCarWatchdogClient::asBinder(client);
166     // kTimeouts is declared as global static constant to cover all kinds of timeout (CRITICAL,
167     // MODERATE, NORMAL).
168     return unregisterClientLocked(kTimeouts, binder, ClientType::Regular);
169 }
170 
registerCarWatchdogService(const sp<IBinder> & binder)171 Status WatchdogProcessService::registerCarWatchdogService(const sp<IBinder>& binder) {
172     pid_t callingPid = IPCThreadState::self()->getCallingPid();
173     uid_t callingUid = IPCThreadState::self()->getCallingUid();
174 
175     Mutex::Autolock lock(mMutex);
176     if (mWatchdogServiceHelper == nullptr) {
177         return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE,
178                                          "Watchdog service helper instance is null");
179     }
180     ClientInfo clientInfo(mWatchdogServiceHelper, binder, callingPid, callingUid);
181     return registerClientLocked(clientInfo, TimeoutLength::TIMEOUT_CRITICAL);
182 }
183 
unregisterCarWatchdogService(const sp<IBinder> & binder)184 void WatchdogProcessService::unregisterCarWatchdogService(const sp<IBinder>& binder) {
185     Mutex::Autolock lock(mMutex);
186 
187     std::vector<TimeoutLength> timeouts = {TimeoutLength::TIMEOUT_CRITICAL};
188     unregisterClientLocked(timeouts, binder, ClientType::Service);
189 }
190 
registerMonitor(const sp<aawi::ICarWatchdogMonitor> & monitor)191 Status WatchdogProcessService::registerMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
192     Mutex::Autolock lock(mMutex);
193     sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
194     if (mMonitor != nullptr && binder == aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
195         return Status::ok();
196     }
197     status_t ret = binder->linkToDeath(mBinderDeathRecipient);
198     if (ret != OK) {
199         ALOGW("Failed to register the monitor as it is dead.");
200         return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, "The monitor is dead.");
201     }
202     mMonitor = monitor;
203     if (DEBUG) {
204         ALOGD("Car watchdog monitor is registered");
205     }
206     return Status::ok();
207 }
208 
unregisterMonitor(const sp<aawi::ICarWatchdogMonitor> & monitor)209 Status WatchdogProcessService::unregisterMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
210     Mutex::Autolock lock(mMutex);
211     sp<IBinder> curBinder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
212     sp<IBinder> newBinder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
213     if (curBinder != newBinder) {
214         ALOGW("Failed to unregister the monitor as it has not been registered.");
215         return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
216                                          "The monitor has not been registered.");
217     }
218     curBinder->unlinkToDeath(mBinderDeathRecipient);
219     mMonitor = nullptr;
220     if (DEBUG) {
221         ALOGD("Car watchdog monitor is unregistered");
222     }
223     return Status::ok();
224 }
225 
tellClientAlive(const sp<ICarWatchdogClient> & client,int32_t sessionId)226 Status WatchdogProcessService::tellClientAlive(const sp<ICarWatchdogClient>& client,
227                                                int32_t sessionId) {
228     Mutex::Autolock lock(mMutex);
229     return tellClientAliveLocked(BnCarWatchdogClient::asBinder(client), sessionId);
230 }
231 
tellCarWatchdogServiceAlive(const sp<ICarWatchdogServiceForSystem> & service,const std::vector<int32_t> & clientsNotResponding,int32_t sessionId)232 Status WatchdogProcessService::tellCarWatchdogServiceAlive(
233         const sp<ICarWatchdogServiceForSystem>& service,
234         const std::vector<int32_t>& clientsNotResponding, int32_t sessionId) {
235     Status status;
236     {
237         Mutex::Autolock lock(mMutex);
238         if (DEBUG) {
239             std::string buffer;
240             int size = clientsNotResponding.size();
241             if (size != 0) {
242                 StringAppendF(&buffer, "%d", clientsNotResponding[0]);
243                 for (int i = 1; i < clientsNotResponding.size(); i++) {
244                     StringAppendF(&buffer, ", %d", clientsNotResponding[i]);
245                 }
246                 ALOGD("CarWatchdogService(session: %d) responded with non-responding clients: %s",
247                       sessionId, buffer.c_str());
248             }
249         }
250         status = tellClientAliveLocked(BnCarWatchdogServiceForSystem::asBinder(service), sessionId);
251     }
252     if (status.isOk()) {
253         dumpAndKillAllProcesses(clientsNotResponding, true);
254     }
255     return status;
256 }
257 
tellDumpFinished(const sp<aawi::ICarWatchdogMonitor> & monitor,int32_t pid)258 Status WatchdogProcessService::tellDumpFinished(const sp<aawi::ICarWatchdogMonitor>& monitor,
259                                                 int32_t pid) {
260     Mutex::Autolock lock(mMutex);
261     if (mMonitor == nullptr || monitor == nullptr ||
262         aawi::BnCarWatchdogMonitor::asBinder(monitor) !=
263                 aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
264         return Status::
265                 fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
266                                   "The monitor is not registered or an invalid monitor is given");
267     }
268     ALOGI("Process(pid: %d) has been dumped and killed", pid);
269     return Status::ok();
270 }
271 
setEnabled(bool isEnabled)272 void WatchdogProcessService::setEnabled(bool isEnabled) {
273     Mutex::Autolock lock(mMutex);
274     if (mIsEnabled != isEnabled) {
275         ALOGI("%s is %s", kServiceName, isEnabled ? "enabled" : "disabled");
276     }
277     mIsEnabled = isEnabled;
278     if (mIsEnabled) {
279         for (const auto& timeout : kTimeouts) {
280             startHealthCheckingLocked(timeout);
281         }
282     }
283 }
284 
notifyUserStateChange(userid_t userId,aawi::UserState state)285 Status WatchdogProcessService::notifyUserStateChange(userid_t userId, aawi::UserState state) {
286     std::string buffer;
287     Mutex::Autolock lock(mMutex);
288     switch (state) {
289         case aawi::UserState::USER_STATE_STARTED:
290             mStoppedUserIds.erase(userId);
291             buffer = StringPrintf("user(%d) is started", userId);
292             break;
293         case aawi::UserState::USER_STATE_STOPPED:
294             mStoppedUserIds.insert(userId);
295             buffer = StringPrintf("user(%d) is stopped", userId);
296             break;
297         default:
298             ALOGW("Unsupported user state: %d", state);
299             return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, "Unsupported user state");
300     }
301     ALOGI("Received user state change: %s", buffer.c_str());
302     return Status::ok();
303 }
304 
dump(int fd,const Vector<String16> &)305 Result<void> WatchdogProcessService::dump(int fd, const Vector<String16>& /*args*/) {
306     Mutex::Autolock lock(mMutex);
307     const char* indent = "  ";
308     const char* doubleIndent = "    ";
309     std::string buffer;
310     WriteStringToFd("CAR WATCHDOG PROCESS SERVICE\n", fd);
311     WriteStringToFd(StringPrintf("%s%s enabled: %s\n", indent, kServiceName,
312                                  mIsEnabled ? "true" : "false"),
313                     fd);
314     WriteStringToFd(StringPrintf("%sRegistered clients\n", indent), fd);
315     int count = 1;
316     for (const auto& timeout : kTimeouts) {
317         std::vector<ClientInfo>& clients = mClients[timeout];
318         for (auto it = clients.begin(); it != clients.end(); it++, count++) {
319             WriteStringToFd(StringPrintf("%sClient #%d: %s\n", doubleIndent, count,
320                                          it->toString().c_str()),
321                             fd);
322         }
323     }
324     WriteStringToFd(StringPrintf("%sMonitor registered: %s\n", indent,
325                                  mMonitor == nullptr ? "false" : "true"),
326                     fd);
327     WriteStringToFd(StringPrintf("%sisSystemShuttingDown: %s\n", indent,
328                                  isSystemShuttingDown() ? "true" : "false"),
329                     fd);
330     buffer = "none";
331     bool first = true;
332     for (const auto& userId : mStoppedUserIds) {
333         if (first) {
334             buffer = StringPrintf("%d", userId);
335             first = false;
336         } else {
337             StringAppendF(&buffer, ", %d", userId);
338         }
339     }
340     WriteStringToFd(StringPrintf("%sStopped users: %s\n", indent, buffer.c_str()), fd);
341     return {};
342 }
343 
doHealthCheck(int what)344 void WatchdogProcessService::doHealthCheck(int what) {
345     mHandlerLooper->removeMessages(mMessageHandler, what);
346     if (Mutex::Autolock lock(mMutex); !mIsEnabled) {
347         return;
348     }
349     const TimeoutLength timeout = static_cast<TimeoutLength>(what);
350     dumpAndKillClientsIfNotResponding(timeout);
351 
352     /* Generates a temporary/local vector containing clients.
353      * Using a local copy may send unnecessary ping messages to clients after they are unregistered.
354      * Clients should be able to handle them.
355      */
356     std::vector<ClientInfo> clientsToCheck;
357     PingedClientMap& pingedClients = mPingedClients[timeout];
358     {
359         Mutex::Autolock lock(mMutex);
360         pingedClients.clear();
361         clientsToCheck = mClients[timeout];
362         for (auto& clientInfo : clientsToCheck) {
363             if (mStoppedUserIds.count(clientInfo.userId) > 0) {
364                 continue;
365             }
366             int sessionId = getNewSessionId();
367             clientInfo.sessionId = sessionId;
368             pingedClients.insert(std::make_pair(sessionId, clientInfo));
369         }
370     }
371 
372     for (const auto& clientInfo : clientsToCheck) {
373         Status status = clientInfo.checkIfAlive(timeout);
374         if (!status.isOk()) {
375             ALOGW("Sending a ping message to client(pid: %d) failed: %s", clientInfo.pid,
376                   status.exceptionMessage().c_str());
377             {
378                 Mutex::Autolock lock(mMutex);
379                 pingedClients.erase(clientInfo.sessionId);
380             }
381         }
382     }
383     // Though the size of pingedClients is a more specific measure, clientsToCheck is used as a
384     // conservative approach.
385     if (clientsToCheck.size() > 0) {
386         auto durationNs = timeoutToDurationNs(timeout);
387         mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
388     }
389 }
390 
start()391 Result<void> WatchdogProcessService::start() {
392     if (mServiceStarted) {
393         return Error(INVALID_OPERATION) << "Cannot start process monitoring more than once";
394     }
395     mServiceStarted = true;
396     reportWatchdogAliveToVhal();
397     return {};
398 }
399 
terminate()400 void WatchdogProcessService::terminate() {
401     Mutex::Autolock lock(mMutex);
402     for (const auto& timeout : kTimeouts) {
403         std::vector<ClientInfo>& clients = mClients[timeout];
404         for (auto it = clients.begin(); it != clients.end();) {
405             it->unlinkToDeath(mBinderDeathRecipient);
406             it = clients.erase(it);
407         }
408     }
409     mWatchdogServiceHelper.clear();
410     if (mMonitor != nullptr) {
411         sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
412         binder->unlinkToDeath(mBinderDeathRecipient);
413     }
414     if (mVhalService != nullptr) {
415         mVhalService->unlinkToDeath(mHidlDeathRecipient);
416     }
417     mServiceStarted = false;
418 }
419 
registerClientLocked(const ClientInfo & clientInfo,TimeoutLength timeout)420 Status WatchdogProcessService::registerClientLocked(const ClientInfo& clientInfo,
421                                                     TimeoutLength timeout) {
422     if (findClientAndProcessLocked(kTimeouts, clientInfo, nullptr)) {
423         ALOGW("Failed to register (%s) as it is already registered.",
424               clientInfo.toString().c_str());
425         return Status::ok();
426     }
427     status_t status = clientInfo.linkToDeath(mBinderDeathRecipient);
428     if (status != OK) {
429         ALOGW("Failed to register (%s) as it is dead", clientInfo.toString().c_str());
430         std::string errorStr = StringPrintf("(%s) is dead", clientInfo.toString().c_str());
431         return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, errorStr.c_str());
432     }
433     std::vector<ClientInfo>& clients = mClients[timeout];
434     clients.emplace_back(clientInfo);
435 
436     // If the client array becomes non-empty, start health checking.
437     if (clients.size() == 1) {
438         startHealthCheckingLocked(timeout);
439     }
440     if (DEBUG) {
441         ALOGD("Car watchdog client (%s, timeout = %d) is registered", clientInfo.toString().c_str(),
442               timeout);
443     }
444     return Status::ok();
445 }
446 
unregisterClientLocked(const std::vector<TimeoutLength> & timeouts,sp<IBinder> binder,ClientType clientType)447 Status WatchdogProcessService::unregisterClientLocked(const std::vector<TimeoutLength>& timeouts,
448                                                       sp<IBinder> binder, ClientType clientType) {
449     const char* clientName = clientType == ClientType::Regular ? "client" : "watchdog service";
450     bool result = findClientAndProcessLocked(timeouts, binder,
451                                              [&](std::vector<ClientInfo>& clients,
452                                                  std::vector<ClientInfo>::const_iterator it) {
453                                                  it->unlinkToDeath(mBinderDeathRecipient);
454                                                  clients.erase(it);
455                                              });
456     if (!result) {
457         std::string errorStr = StringPrintf("The %s has not been registered", clientName);
458         const char* errorCause = errorStr.c_str();
459         ALOGW("Failed to unregister the %s: %s", clientName, errorCause);
460         return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, errorCause);
461     }
462     if (DEBUG) {
463         ALOGD("Car watchdog %s is unregistered", clientName);
464     }
465     return Status::ok();
466 }
467 
tellClientAliveLocked(const sp<IBinder> & binder,int32_t sessionId)468 Status WatchdogProcessService::tellClientAliveLocked(const sp<IBinder>& binder, int32_t sessionId) {
469     for (const auto& timeout : kTimeouts) {
470         PingedClientMap& clients = mPingedClients[timeout];
471         PingedClientMap::const_iterator it = clients.find(sessionId);
472         if (it == clients.cend() || !it->second.matchesBinder(binder)) {
473             continue;
474         }
475         clients.erase(it);
476         return Status::ok();
477     }
478     return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
479                                      "The client is not registered or the session ID is not found");
480 }
481 
findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,const ClientInfo & clientInfo,const Processor & processor)482 bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
483                                                         const ClientInfo& clientInfo,
484                                                         const Processor& processor) {
485     for (const auto& timeout : timeouts) {
486         std::vector<ClientInfo>& clients = mClients[timeout];
487         for (auto it = clients.begin(); it != clients.end(); it++) {
488             if (std::as_const(*it) != clientInfo) {
489                 continue;
490             }
491             if (processor != nullptr) {
492                 processor(clients, it);
493             }
494             return true;
495         }
496     }
497     return false;
498 }
499 
findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,const sp<IBinder> binder,const Processor & processor)500 bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
501                                                         const sp<IBinder> binder,
502                                                         const Processor& processor) {
503     for (const auto& timeout : timeouts) {
504         std::vector<ClientInfo>& clients = mClients[timeout];
505         for (auto it = clients.begin(); it != clients.end(); it++) {
506             if (!it->matchesBinder(binder)) {
507                 continue;
508             }
509             if (processor != nullptr) {
510                 processor(clients, it);
511             }
512             return true;
513         }
514     }
515     return false;
516 }
517 
startHealthCheckingLocked(TimeoutLength timeout)518 Result<void> WatchdogProcessService::startHealthCheckingLocked(TimeoutLength timeout) {
519     PingedClientMap& clients = mPingedClients[timeout];
520     clients.clear();
521     int what = static_cast<int>(timeout);
522     auto durationNs = timeoutToDurationNs(timeout);
523     mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
524     return {};
525 }
526 
dumpAndKillClientsIfNotResponding(TimeoutLength timeout)527 Result<void> WatchdogProcessService::dumpAndKillClientsIfNotResponding(TimeoutLength timeout) {
528     std::vector<int32_t> processIds;
529     std::vector<const ClientInfo*> clientsToNotify;
530     {
531         Mutex::Autolock lock(mMutex);
532         PingedClientMap& clients = mPingedClients[timeout];
533         for (PingedClientMap::const_iterator it = clients.cbegin(); it != clients.cend(); it++) {
534             pid_t pid = -1;
535             userid_t userId = -1;
536             std::vector<TimeoutLength> timeouts = {timeout};
537             findClientAndProcessLocked(timeouts, it->second,
538                                        [&](std::vector<ClientInfo>& cachedClients,
539                                            std::vector<ClientInfo>::const_iterator
540                                                    cachedClientsIt) {
541                                            pid = cachedClientsIt->pid;
542                                            userId = cachedClientsIt->userId;
543                                            cachedClients.erase(cachedClientsIt);
544                                        });
545             if (pid != -1 && mStoppedUserIds.count(userId) == 0) {
546                 clientsToNotify.emplace_back(&it->second);
547                 processIds.push_back(pid);
548             }
549         }
550     }
551     for (const ClientInfo*& clientInfo : clientsToNotify) {
552         clientInfo->prepareProcessTermination();
553     }
554     return dumpAndKillAllProcesses(processIds, true);
555 }
556 
dumpAndKillAllProcesses(const std::vector<int32_t> & processesNotResponding,bool reportToVhal)557 Result<void> WatchdogProcessService::dumpAndKillAllProcesses(
558         const std::vector<int32_t>& processesNotResponding, bool reportToVhal) {
559     size_t size = processesNotResponding.size();
560     if (size == 0) {
561         return {};
562     }
563     std::string pidString = pidArrayToString(processesNotResponding);
564     sp<aawi::ICarWatchdogMonitor> monitor;
565     {
566         Mutex::Autolock lock(mMutex);
567         if (mMonitor == nullptr) {
568             std::string errorMsg =
569                     StringPrintf("Failed to dump and kill processes(pid = %s): Monitor is not set",
570                                  pidString.c_str());
571             ALOGW("%s", errorMsg.c_str());
572             return Error() << errorMsg;
573         }
574         monitor = mMonitor;
575     }
576     if (isSystemShuttingDown()) {
577         ALOGI("Skip dumping and killing processes(%s): The system is shutting down",
578               pidString.c_str());
579         return {};
580     }
581     if (reportToVhal) {
582         reportTerminatedProcessToVhal(processesNotResponding);
583     }
584     monitor->onClientsNotResponding(processesNotResponding);
585     if (DEBUG) {
586         ALOGD("Dumping and killing processes is requested: %s", pidString.c_str());
587     }
588     return {};
589 }
590 
591 // Handle when car watchdog clients die.
handleBinderDeath(const wp<IBinder> & who)592 void WatchdogProcessService::handleBinderDeath(const wp<IBinder>& who) {
593     Mutex::Autolock lock(mMutex);
594     IBinder* binder = who.unsafe_get();
595     // Check if dead binder is monitor.
596     sp<IBinder> monitor = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
597     if (monitor == binder) {
598         mMonitor = nullptr;
599         ALOGW("The monitor has died.");
600         return;
601     }
602     findClientAndProcessLocked(kTimeouts, binder,
603                                [&](std::vector<ClientInfo>& clients,
604                                    std::vector<ClientInfo>::const_iterator it) {
605                                    ALOGW("Client(pid: %d) died", it->pid);
606                                    clients.erase(it);
607                                });
608 }
609 
610 // Handle when VHAL dies.
handleHidlDeath(const wp<IBase> & who)611 void WatchdogProcessService::handleHidlDeath(const wp<IBase>& who) {
612     Mutex::Autolock lock(mMutex);
613     if (!interfacesEqual(mVhalService, who.promote())) {
614         return;
615     }
616     ALOGW("VHAL has died.");
617     mVhalService->unlinkToDeath(mHidlDeathRecipient);
618     mVhalService = nullptr;
619 }
620 
reportWatchdogAliveToVhal()621 void WatchdogProcessService::reportWatchdogAliveToVhal() {
622     if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_ALIVE) > 0) {
623         ALOGW("VHAL doesn't support WATCHDOG_ALIVE. Car watchdog will not update WATCHDOG_ALIVE.");
624         return;
625     }
626     int64_t systemUptime = uptimeMillis();
627     VehiclePropValue propValue{
628             .prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_ALIVE),
629             .status = VehiclePropertyStatus::AVAILABLE,
630             .value = {.int64Values = {systemUptime}},
631     };
632     const auto& ret = updateVhal(propValue);
633     if (!ret.ok()) {
634         ALOGW("Failed to update WATCHDOG_ALIVE VHAL property. Will try again in 3s");
635     }
636     // Update VHAL with the interval of TIMEOUT_CRITICAL(3s).
637     auto durationNs = timeoutToDurationNs(TimeoutLength::TIMEOUT_CRITICAL);
638     mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_WATCHDOG_ALIVE);
639     mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler,
640                                        Message(MSG_VHAL_WATCHDOG_ALIVE));
641 }
642 
reportTerminatedProcessToVhal(const std::vector<int32_t> & processesNotResponding)643 void WatchdogProcessService::reportTerminatedProcessToVhal(
644         const std::vector<int32_t>& processesNotResponding) {
645     if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_TERMINATED_PROCESS) > 0) {
646         ALOGW("VHAL doesn't support WATCHDOG_TERMINATED_PROCESS. Terminated process is not "
647               "reported to VHAL.");
648         return;
649     }
650     for (auto&& pid : processesNotResponding) {
651         const auto& retCmdLine = readProcCmdLine(pid);
652         if (!retCmdLine.ok()) {
653             ALOGW("Failed to get process command line for pid(%d): %s", pid,
654                   retCmdLine.error().message().c_str());
655             continue;
656         }
657         std::string procCmdLine = retCmdLine.value();
658         VehiclePropValue propValue{
659                 .prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_TERMINATED_PROCESS),
660                 .status = VehiclePropertyStatus::AVAILABLE,
661                 .value = {
662                          .int32Values = {static_cast<int32_t>(
663                                  ProcessTerminationReason::NOT_RESPONDING)},
664                          .stringValue = procCmdLine,
665                 },
666         };
667         const auto& retUpdate = updateVhal(propValue);
668         if (!retUpdate.ok()) {
669             ALOGW("Failed to update WATCHDOG_TERMINATED_PROCESS VHAL property(command line: %s)",
670                   procCmdLine.c_str());
671         }
672     }
673 }
674 
updateVhal(const VehiclePropValue & value)675 Result<void> WatchdogProcessService::updateVhal(const VehiclePropValue& value) {
676     Mutex::Autolock lock(mMutex);
677     const auto& connectRet = connectToVhalLocked();
678     if (!connectRet.ok()) {
679         std::string errorMsg = "VHAL is not connected: " + connectRet.error().message();
680         ALOGW("%s", errorMsg.c_str());
681         return Error() << errorMsg;
682     }
683     if (mNotSupportedVhalProperties.count(static_cast<VehicleProperty>(value.prop)) > 0) {
684         std::string errorMsg = StringPrintf("VHAL doesn't support property(id: %d)", value.prop);
685         ALOGW("%s", errorMsg.c_str());
686         return Error() << errorMsg;
687     }
688     const auto& updateRet = mVhalService->set(value);
689     if (updateRet.isOk() && updateRet == StatusCode::OK) {
690         return {};
691     }
692     return Error() << "Failed to set propValue(" << value.prop << ") to VHAL";
693 }
694 
readProcCmdLine(int32_t pid)695 Result<std::string> WatchdogProcessService::readProcCmdLine(int32_t pid) {
696     std::string cmdLinePath = StringPrintf("/proc/%d/cmdline", pid);
697     std::string procCmdLine;
698     if (ReadFileToString(cmdLinePath, &procCmdLine)) {
699         std::replace(procCmdLine.begin(), procCmdLine.end(), '\0', ' ');
700         procCmdLine = Trim(procCmdLine);
701         return procCmdLine;
702     }
703     return Error() << "Failed to read " << cmdLinePath;
704 }
705 
connectToVhalLocked()706 Result<void> WatchdogProcessService::connectToVhalLocked() {
707     if (mVhalService.get() != nullptr) {
708         return {};
709     }
710     mVhalService = IVehicle::tryGetService();
711     if (mVhalService.get() == nullptr) {
712         return Error() << "Failed to connect to VHAL.";
713     }
714     mVhalService->linkToDeath(mHidlDeathRecipient, /*cookie=*/0);
715     queryVhalPropertiesLocked();
716     subscribeToVhalHeartBeatLocked();
717     ALOGI("Successfully connected to VHAL.");
718     return {};
719 }
720 
queryVhalPropertiesLocked()721 void WatchdogProcessService::queryVhalPropertiesLocked() {
722     mNotSupportedVhalProperties.clear();
723     std::vector<VehicleProperty> propIds = {VehicleProperty::WATCHDOG_ALIVE,
724                                             VehicleProperty::WATCHDOG_TERMINATED_PROCESS,
725                                             VehicleProperty::VHAL_HEARTBEAT};
726     for (const auto& propId : propIds) {
727         if (!isVhalPropertySupportedLocked(propId)) {
728             mNotSupportedVhalProperties.insert(propId);
729         }
730     }
731 }
732 
isVhalPropertySupportedLocked(VehicleProperty propId)733 bool WatchdogProcessService::isVhalPropertySupportedLocked(VehicleProperty propId) {
734     StatusCode status;
735     hidl_vec<int32_t> props = {static_cast<int32_t>(propId)};
736     mVhalService->getPropConfigs(props,
737                                  [&status](StatusCode s,
738                                            hidl_vec<VehiclePropConfig> /*propConfigs*/) {
739                                      status = s;
740                                  });
741     return status == StatusCode::OK;
742 }
743 
subscribeToVhalHeartBeatLocked()744 void WatchdogProcessService::subscribeToVhalHeartBeatLocked() {
745     if (mNotSupportedVhalProperties.count(VehicleProperty::VHAL_HEARTBEAT) > 0) {
746         ALOGW("VHAL doesn't support VHAL_HEARTBEAT. Checking VHAL health is disabled.");
747         return;
748     }
749 
750     mVhalHeartBeat = {
751             .eventTime = 0,
752             .value = 0,
753     };
754 
755     SubscribeOptions reqVhalProperties[] = {
756             {.propId = static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT),
757              .flags = SubscribeFlags::EVENTS_FROM_CAR},
758     };
759     hidl_vec<SubscribeOptions> options;
760     options.setToExternal(reqVhalProperties, arraysize(reqVhalProperties));
761     StatusCode status = mVhalService->subscribe(mPropertyChangeListener, options);
762     if (status != StatusCode::OK) {
763         ALOGW("Failed to subscribe to VHAL_HEARTBEAT. Checking VHAL health is disabled.");
764         return;
765     }
766     mHandlerLooper->sendMessageDelayed(kVhalHealthCheckDelayNs.count(), mMessageHandler,
767                                        Message(MSG_VHAL_HEALTH_CHECK));
768 }
769 
getNewSessionId()770 int32_t WatchdogProcessService::getNewSessionId() {
771     // Make sure that session id is always positive number.
772     if (++mLastSessionId <= 0) {
773         mLastSessionId = 1;
774     }
775     return mLastSessionId;
776 }
777 
updateVhalHeartBeat(int64_t value)778 void WatchdogProcessService::updateVhalHeartBeat(int64_t value) {
779     bool wrongHeartBeat;
780     {
781         Mutex::Autolock lock(mMutex);
782         wrongHeartBeat = value <= mVhalHeartBeat.value;
783         mVhalHeartBeat.eventTime = uptimeMillis();
784         mVhalHeartBeat.value = value;
785     }
786     if (wrongHeartBeat) {
787         ALOGW("VHAL updated heart beat with a wrong value. Terminating VHAL...");
788         terminateVhal();
789         return;
790     }
791     mHandlerLooper->sendMessageDelayed(kVhalHealthCheckDelayNs.count(), mMessageHandler,
792                                        Message(MSG_VHAL_HEALTH_CHECK));
793 }
794 
checkVhalHealth()795 void WatchdogProcessService::checkVhalHealth() {
796     int64_t lastEventTime;
797     int64_t currentUptime = uptimeMillis();
798     {
799         Mutex::Autolock lock(mMutex);
800         lastEventTime = mVhalHeartBeat.eventTime;
801     }
802     if (currentUptime > lastEventTime + kVhalHeartBeatIntervalMs) {
803         ALOGW("VHAL failed to update heart beat within timeout. Terminating VHAL...");
804         terminateVhal();
805     }
806 }
807 
terminateVhal()808 void WatchdogProcessService::terminateVhal() {
809     using ::android::hidl::manager::V1_0::IServiceManager;
810 
811     std::vector<int32_t> processIds;
812     sp<IServiceManager> manager = IServiceManager::getService();
813     Return<void> ret = manager->debugDump([&](auto& hals) {
814         for (const auto& info : hals) {
815             if (info.pid == static_cast<int>(IServiceManager::PidConstant::NO_PID)) {
816                 continue;
817             }
818             if (info.interfaceName == kVhalInterfaceName) {
819                 processIds.push_back(info.pid);
820                 break;
821             }
822         }
823     });
824 
825     if (!ret.isOk()) {
826         ALOGE("Failed to terminate VHAL: could not get VHAL process id");
827         return;
828     } else if (processIds.empty()) {
829         ALOGE("Failed to terminate VHAL: VHAL is not running");
830         return;
831     }
832     dumpAndKillAllProcesses(processIds, false);
833 }
834 
toString() const835 std::string WatchdogProcessService::ClientInfo::toString() const {
836     std::string buffer;
837     StringAppendF(&buffer, "pid = %d, userId = %d, type = %s", pid, userId,
838                   type == ClientType::Regular ? "regular" : "watchdog service");
839     return buffer;
840 }
841 
getBinder() const842 sp<IBinder> WatchdogProcessService::ClientInfo::getBinder() const {
843     if (type == ClientType::Regular) {
844         return BnCarWatchdogClient::asBinder(client);
845     }
846     return watchdogServiceBinder;
847 }
848 
linkToDeath(const sp<IBinder::DeathRecipient> & recipient) const849 status_t WatchdogProcessService::ClientInfo::linkToDeath(
850         const sp<IBinder::DeathRecipient>& recipient) const {
851     if (type == ClientType::Regular) {
852         return BnCarWatchdogClient::asBinder(client)->linkToDeath(recipient);
853     }
854     // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
855     // skip this step.
856     return OK;
857 }
858 
unlinkToDeath(const wp<IBinder::DeathRecipient> & recipient) const859 status_t WatchdogProcessService::ClientInfo::unlinkToDeath(
860         const wp<IBinder::DeathRecipient>& recipient) const {
861     if (type == ClientType::Regular) {
862         return BnCarWatchdogClient::asBinder(client)->unlinkToDeath(recipient);
863     }
864     // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
865     // skip this step.
866     return OK;
867 }
868 
checkIfAlive(TimeoutLength timeout) const869 Status WatchdogProcessService::ClientInfo::checkIfAlive(TimeoutLength timeout) const {
870     if (type == ClientType::Regular) {
871         return client->checkIfAlive(sessionId, timeout);
872     }
873     return watchdogServiceHelper->checkIfAlive(watchdogServiceBinder, sessionId, timeout);
874 }
875 
prepareProcessTermination() const876 Status WatchdogProcessService::ClientInfo::prepareProcessTermination() const {
877     if (type == ClientType::Regular) {
878         return client->prepareProcessTermination();
879     }
880     return watchdogServiceHelper->prepareProcessTermination(watchdogServiceBinder);
881 }
882 
BinderDeathRecipient(const sp<WatchdogProcessService> & service)883 WatchdogProcessService::BinderDeathRecipient::BinderDeathRecipient(
884         const sp<WatchdogProcessService>& service) :
885       mService(service) {}
886 
binderDied(const wp<IBinder> & who)887 void WatchdogProcessService::BinderDeathRecipient::binderDied(const wp<IBinder>& who) {
888     mService->handleBinderDeath(who);
889 }
890 
HidlDeathRecipient(const sp<WatchdogProcessService> & service)891 WatchdogProcessService::HidlDeathRecipient::HidlDeathRecipient(
892         const sp<WatchdogProcessService>& service) :
893       mService(service) {}
894 
serviceDied(uint64_t,const wp<IBase> & who)895 void WatchdogProcessService::HidlDeathRecipient::serviceDied(uint64_t /*cookie*/,
896                                                              const wp<IBase>& who) {
897     mService->handleHidlDeath(who);
898 }
899 
PropertyChangeListener(const sp<WatchdogProcessService> & service)900 WatchdogProcessService::PropertyChangeListener::PropertyChangeListener(
901         const sp<WatchdogProcessService>& service) :
902       mService(service) {}
903 
onPropertyEvent(const hidl_vec<VehiclePropValue> & propValues)904 Return<void> WatchdogProcessService::PropertyChangeListener::onPropertyEvent(
905         const hidl_vec<VehiclePropValue>& propValues) {
906     for (const auto& value : propValues) {
907         if (value.prop == static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT)) {
908             mService->updateVhalHeartBeat(value.value.int64Values[0]);
909             break;
910         }
911     }
912     return Return<void>();
913 }
914 
onPropertySet(const VehiclePropValue &)915 Return<void> WatchdogProcessService::PropertyChangeListener::onPropertySet(
916         const VehiclePropValue& /*propValue*/) {
917     return Return<void>();
918 }
919 
onPropertySetError(StatusCode,int32_t,int32_t)920 Return<void> WatchdogProcessService::PropertyChangeListener::onPropertySetError(
921         StatusCode /*status*/, int32_t /*propId*/, int32_t /*areaId*/) {
922     return Return<void>();
923 }
924 
MessageHandlerImpl(const sp<WatchdogProcessService> & service)925 WatchdogProcessService::MessageHandlerImpl::MessageHandlerImpl(
926         const sp<WatchdogProcessService>& service) :
927       mService(service) {}
928 
handleMessage(const Message & message)929 void WatchdogProcessService::MessageHandlerImpl::handleMessage(const Message& message) {
930     switch (message.what) {
931         case static_cast<int>(TimeoutLength::TIMEOUT_CRITICAL):
932         case static_cast<int>(TimeoutLength::TIMEOUT_MODERATE):
933         case static_cast<int>(TimeoutLength::TIMEOUT_NORMAL):
934             mService->doHealthCheck(message.what);
935             break;
936         case MSG_VHAL_WATCHDOG_ALIVE:
937             mService->reportWatchdogAliveToVhal();
938             break;
939         case MSG_VHAL_HEALTH_CHECK:
940             mService->checkVhalHealth();
941             break;
942         default:
943             ALOGW("Unknown message: %d", message.what);
944     }
945 }
946 
947 }  // namespace watchdog
948 }  // namespace automotive
949 }  // namespace android
950