1 /**
2 * Copyright (c) 2020, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false // STOPSHIP if true.
19
20 #include "WatchdogProcessService.h"
21
22 #include "WatchdogServiceHelper.h"
23
24 #include <android-base/chrono_utils.h>
25 #include <android-base/file.h>
26 #include <android-base/macros.h>
27 #include <android-base/properties.h>
28 #include <android-base/stringprintf.h>
29 #include <android-base/strings.h>
30 #include <android/automotive/watchdog/BnCarWatchdogClient.h>
31 #include <android/automotive/watchdog/internal/BnCarWatchdogMonitor.h>
32 #include <android/automotive/watchdog/internal/BnCarWatchdogServiceForSystem.h>
33 #include <android/hardware/automotive/vehicle/2.0/types.h>
34 #include <android/hidl/manager/1.0/IServiceManager.h>
35 #include <binder/IPCThreadState.h>
36 #include <hidl/HidlTransportSupport.h>
37 #include <utils/SystemClock.h>
38
39 #include <utility>
40
41 namespace android {
42 namespace automotive {
43 namespace watchdog {
44
45 namespace aawi = ::android::automotive::watchdog::internal;
46
47 using aawi::BnCarWatchdogServiceForSystem;
48 using aawi::ICarWatchdogServiceForSystem;
49 using ::android::IBinder;
50 using ::android::sp;
51 using ::android::String16;
52 using ::android::base::Error;
53 using ::android::base::GetProperty;
54 using ::android::base::ReadFileToString;
55 using ::android::base::Result;
56 using ::android::base::StringAppendF;
57 using ::android::base::StringPrintf;
58 using ::android::base::Trim;
59 using ::android::base::WriteStringToFd;
60 using ::android::binder::Status;
61 using ::android::hardware::hidl_vec;
62 using ::android::hardware::interfacesEqual;
63 using ::android::hardware::Return;
64 using ::android::hardware::automotive::vehicle::V2_0::IVehicle;
65 using ::android::hardware::automotive::vehicle::V2_0::ProcessTerminationReason;
66 using ::android::hardware::automotive::vehicle::V2_0::StatusCode;
67 using ::android::hardware::automotive::vehicle::V2_0::SubscribeFlags;
68 using ::android::hardware::automotive::vehicle::V2_0::SubscribeOptions;
69 using ::android::hardware::automotive::vehicle::V2_0::VehiclePropConfig;
70 using ::android::hardware::automotive::vehicle::V2_0::VehicleProperty;
71 using ::android::hardware::automotive::vehicle::V2_0::VehiclePropertyStatus;
72 using ::android::hardware::automotive::vehicle::V2_0::VehiclePropValue;
73 using ::android::hidl::base::V1_0::IBase;
74
75 namespace {
76
77 const std::vector<TimeoutLength> kTimeouts = {TimeoutLength::TIMEOUT_CRITICAL,
78 TimeoutLength::TIMEOUT_MODERATE,
79 TimeoutLength::TIMEOUT_NORMAL};
80
81 // TimeoutLength is also used as a message ID. Other message IDs should start next to
82 // TimeoutLength::TIMEOUT_NORMAL.
83 const int32_t MSG_VHAL_WATCHDOG_ALIVE = static_cast<int>(TimeoutLength::TIMEOUT_NORMAL) + 1;
84 const int32_t MSG_VHAL_HEALTH_CHECK = MSG_VHAL_WATCHDOG_ALIVE + 1;
85
86 // VHAL sends heart beat every 3s. Car watchdog checks if there is the latest heart beat from VHAL
87 // with 1s marginal time.
88 constexpr std::chrono::nanoseconds kVhalHealthCheckDelayNs = 4s;
89 constexpr int64_t kVhalHeartBeatIntervalMs = 3000;
90
91 constexpr const char kServiceName[] = "WatchdogProcessService";
92 constexpr const char kVhalInterfaceName[] = "android.hardware.automotive.vehicle@2.0::IVehicle";
93
timeoutToDurationNs(const TimeoutLength & timeout)94 std::chrono::nanoseconds timeoutToDurationNs(const TimeoutLength& timeout) {
95 switch (timeout) {
96 case TimeoutLength::TIMEOUT_CRITICAL:
97 return 3s; // 3s and no buffer time.
98 case TimeoutLength::TIMEOUT_MODERATE:
99 return 6s; // 5s + 1s as buffer time.
100 case TimeoutLength::TIMEOUT_NORMAL:
101 return 12s; // 10s + 2s as buffer time.
102 }
103 }
104
pidArrayToString(const std::vector<int32_t> & pids)105 std::string pidArrayToString(const std::vector<int32_t>& pids) {
106 size_t size = pids.size();
107 if (size == 0) {
108 return "";
109 }
110 std::string buffer;
111 StringAppendF(&buffer, "%d", pids[0]);
112 for (int i = 1; i < size; i++) {
113 int pid = pids[i];
114 StringAppendF(&buffer, ", %d", pid);
115 }
116 return buffer;
117 }
118
isSystemShuttingDown()119 bool isSystemShuttingDown() {
120 std::string sysPowerCtl;
121 std::istringstream tokenStream(GetProperty("sys.powerctl", ""));
122 std::getline(tokenStream, sysPowerCtl, ',');
123 return sysPowerCtl == "reboot" || sysPowerCtl == "shutdown";
124 }
125
126 } // namespace
127
WatchdogProcessService(const sp<Looper> & handlerLooper)128 WatchdogProcessService::WatchdogProcessService(const sp<Looper>& handlerLooper) :
129 mHandlerLooper(handlerLooper),
130 mIsEnabled(true),
131 mLastSessionId(0),
132 mServiceStarted(false),
133 mVhalService(nullptr) {
134 mMessageHandler = sp<MessageHandlerImpl>::make(this);
135 mBinderDeathRecipient = sp<BinderDeathRecipient>::make(this);
136 mHidlDeathRecipient = sp<HidlDeathRecipient>::make(this);
137 mPropertyChangeListener = sp<PropertyChangeListener>::make(this);
138 for (const auto& timeout : kTimeouts) {
139 mClients.insert(std::make_pair(timeout, std::vector<ClientInfo>()));
140 mPingedClients.insert(std::make_pair(timeout, PingedClientMap()));
141 }
142 }
registerWatchdogServiceHelper(const sp<IWatchdogServiceHelper> & helper)143 Result<void> WatchdogProcessService::registerWatchdogServiceHelper(
144 const sp<IWatchdogServiceHelper>& helper) {
145 if (helper == nullptr) {
146 return Error() << "Must provide a non-null watchdog service helper instance";
147 }
148 Mutex::Autolock lock(mMutex);
149 mWatchdogServiceHelper = helper;
150 return {};
151 }
152
registerClient(const sp<ICarWatchdogClient> & client,TimeoutLength timeout)153 Status WatchdogProcessService::registerClient(const sp<ICarWatchdogClient>& client,
154 TimeoutLength timeout) {
155 pid_t callingPid = IPCThreadState::self()->getCallingPid();
156 uid_t callingUid = IPCThreadState::self()->getCallingUid();
157 ClientInfo clientInfo(client, callingPid, callingUid);
158
159 Mutex::Autolock lock(mMutex);
160 return registerClientLocked(clientInfo, timeout);
161 }
162
unregisterClient(const sp<ICarWatchdogClient> & client)163 Status WatchdogProcessService::unregisterClient(const sp<ICarWatchdogClient>& client) {
164 Mutex::Autolock lock(mMutex);
165 sp<IBinder> binder = BnCarWatchdogClient::asBinder(client);
166 // kTimeouts is declared as global static constant to cover all kinds of timeout (CRITICAL,
167 // MODERATE, NORMAL).
168 return unregisterClientLocked(kTimeouts, binder, ClientType::Regular);
169 }
170
registerCarWatchdogService(const sp<IBinder> & binder)171 Status WatchdogProcessService::registerCarWatchdogService(const sp<IBinder>& binder) {
172 pid_t callingPid = IPCThreadState::self()->getCallingPid();
173 uid_t callingUid = IPCThreadState::self()->getCallingUid();
174
175 Mutex::Autolock lock(mMutex);
176 if (mWatchdogServiceHelper == nullptr) {
177 return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE,
178 "Watchdog service helper instance is null");
179 }
180 ClientInfo clientInfo(mWatchdogServiceHelper, binder, callingPid, callingUid);
181 return registerClientLocked(clientInfo, TimeoutLength::TIMEOUT_CRITICAL);
182 }
183
unregisterCarWatchdogService(const sp<IBinder> & binder)184 void WatchdogProcessService::unregisterCarWatchdogService(const sp<IBinder>& binder) {
185 Mutex::Autolock lock(mMutex);
186
187 std::vector<TimeoutLength> timeouts = {TimeoutLength::TIMEOUT_CRITICAL};
188 unregisterClientLocked(timeouts, binder, ClientType::Service);
189 }
190
registerMonitor(const sp<aawi::ICarWatchdogMonitor> & monitor)191 Status WatchdogProcessService::registerMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
192 Mutex::Autolock lock(mMutex);
193 sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
194 if (mMonitor != nullptr && binder == aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
195 return Status::ok();
196 }
197 status_t ret = binder->linkToDeath(mBinderDeathRecipient);
198 if (ret != OK) {
199 ALOGW("Failed to register the monitor as it is dead.");
200 return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, "The monitor is dead.");
201 }
202 mMonitor = monitor;
203 if (DEBUG) {
204 ALOGD("Car watchdog monitor is registered");
205 }
206 return Status::ok();
207 }
208
unregisterMonitor(const sp<aawi::ICarWatchdogMonitor> & monitor)209 Status WatchdogProcessService::unregisterMonitor(const sp<aawi::ICarWatchdogMonitor>& monitor) {
210 Mutex::Autolock lock(mMutex);
211 sp<IBinder> curBinder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
212 sp<IBinder> newBinder = aawi::BnCarWatchdogMonitor::asBinder(monitor);
213 if (curBinder != newBinder) {
214 ALOGW("Failed to unregister the monitor as it has not been registered.");
215 return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
216 "The monitor has not been registered.");
217 }
218 curBinder->unlinkToDeath(mBinderDeathRecipient);
219 mMonitor = nullptr;
220 if (DEBUG) {
221 ALOGD("Car watchdog monitor is unregistered");
222 }
223 return Status::ok();
224 }
225
tellClientAlive(const sp<ICarWatchdogClient> & client,int32_t sessionId)226 Status WatchdogProcessService::tellClientAlive(const sp<ICarWatchdogClient>& client,
227 int32_t sessionId) {
228 Mutex::Autolock lock(mMutex);
229 return tellClientAliveLocked(BnCarWatchdogClient::asBinder(client), sessionId);
230 }
231
tellCarWatchdogServiceAlive(const sp<ICarWatchdogServiceForSystem> & service,const std::vector<int32_t> & clientsNotResponding,int32_t sessionId)232 Status WatchdogProcessService::tellCarWatchdogServiceAlive(
233 const sp<ICarWatchdogServiceForSystem>& service,
234 const std::vector<int32_t>& clientsNotResponding, int32_t sessionId) {
235 Status status;
236 {
237 Mutex::Autolock lock(mMutex);
238 if (DEBUG) {
239 std::string buffer;
240 int size = clientsNotResponding.size();
241 if (size != 0) {
242 StringAppendF(&buffer, "%d", clientsNotResponding[0]);
243 for (int i = 1; i < clientsNotResponding.size(); i++) {
244 StringAppendF(&buffer, ", %d", clientsNotResponding[i]);
245 }
246 ALOGD("CarWatchdogService(session: %d) responded with non-responding clients: %s",
247 sessionId, buffer.c_str());
248 }
249 }
250 status = tellClientAliveLocked(BnCarWatchdogServiceForSystem::asBinder(service), sessionId);
251 }
252 if (status.isOk()) {
253 dumpAndKillAllProcesses(clientsNotResponding, true);
254 }
255 return status;
256 }
257
tellDumpFinished(const sp<aawi::ICarWatchdogMonitor> & monitor,int32_t pid)258 Status WatchdogProcessService::tellDumpFinished(const sp<aawi::ICarWatchdogMonitor>& monitor,
259 int32_t pid) {
260 Mutex::Autolock lock(mMutex);
261 if (mMonitor == nullptr || monitor == nullptr ||
262 aawi::BnCarWatchdogMonitor::asBinder(monitor) !=
263 aawi::BnCarWatchdogMonitor::asBinder(mMonitor)) {
264 return Status::
265 fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
266 "The monitor is not registered or an invalid monitor is given");
267 }
268 ALOGI("Process(pid: %d) has been dumped and killed", pid);
269 return Status::ok();
270 }
271
setEnabled(bool isEnabled)272 void WatchdogProcessService::setEnabled(bool isEnabled) {
273 Mutex::Autolock lock(mMutex);
274 if (mIsEnabled != isEnabled) {
275 ALOGI("%s is %s", kServiceName, isEnabled ? "enabled" : "disabled");
276 }
277 mIsEnabled = isEnabled;
278 if (mIsEnabled) {
279 for (const auto& timeout : kTimeouts) {
280 startHealthCheckingLocked(timeout);
281 }
282 }
283 }
284
notifyUserStateChange(userid_t userId,aawi::UserState state)285 Status WatchdogProcessService::notifyUserStateChange(userid_t userId, aawi::UserState state) {
286 std::string buffer;
287 Mutex::Autolock lock(mMutex);
288 switch (state) {
289 case aawi::UserState::USER_STATE_STARTED:
290 mStoppedUserIds.erase(userId);
291 buffer = StringPrintf("user(%d) is started", userId);
292 break;
293 case aawi::UserState::USER_STATE_STOPPED:
294 mStoppedUserIds.insert(userId);
295 buffer = StringPrintf("user(%d) is stopped", userId);
296 break;
297 default:
298 ALOGW("Unsupported user state: %d", state);
299 return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, "Unsupported user state");
300 }
301 ALOGI("Received user state change: %s", buffer.c_str());
302 return Status::ok();
303 }
304
dump(int fd,const Vector<String16> &)305 Result<void> WatchdogProcessService::dump(int fd, const Vector<String16>& /*args*/) {
306 Mutex::Autolock lock(mMutex);
307 const char* indent = " ";
308 const char* doubleIndent = " ";
309 std::string buffer;
310 WriteStringToFd("CAR WATCHDOG PROCESS SERVICE\n", fd);
311 WriteStringToFd(StringPrintf("%s%s enabled: %s\n", indent, kServiceName,
312 mIsEnabled ? "true" : "false"),
313 fd);
314 WriteStringToFd(StringPrintf("%sRegistered clients\n", indent), fd);
315 int count = 1;
316 for (const auto& timeout : kTimeouts) {
317 std::vector<ClientInfo>& clients = mClients[timeout];
318 for (auto it = clients.begin(); it != clients.end(); it++, count++) {
319 WriteStringToFd(StringPrintf("%sClient #%d: %s\n", doubleIndent, count,
320 it->toString().c_str()),
321 fd);
322 }
323 }
324 WriteStringToFd(StringPrintf("%sMonitor registered: %s\n", indent,
325 mMonitor == nullptr ? "false" : "true"),
326 fd);
327 WriteStringToFd(StringPrintf("%sisSystemShuttingDown: %s\n", indent,
328 isSystemShuttingDown() ? "true" : "false"),
329 fd);
330 buffer = "none";
331 bool first = true;
332 for (const auto& userId : mStoppedUserIds) {
333 if (first) {
334 buffer = StringPrintf("%d", userId);
335 first = false;
336 } else {
337 StringAppendF(&buffer, ", %d", userId);
338 }
339 }
340 WriteStringToFd(StringPrintf("%sStopped users: %s\n", indent, buffer.c_str()), fd);
341 return {};
342 }
343
doHealthCheck(int what)344 void WatchdogProcessService::doHealthCheck(int what) {
345 mHandlerLooper->removeMessages(mMessageHandler, what);
346 if (Mutex::Autolock lock(mMutex); !mIsEnabled) {
347 return;
348 }
349 const TimeoutLength timeout = static_cast<TimeoutLength>(what);
350 dumpAndKillClientsIfNotResponding(timeout);
351
352 /* Generates a temporary/local vector containing clients.
353 * Using a local copy may send unnecessary ping messages to clients after they are unregistered.
354 * Clients should be able to handle them.
355 */
356 std::vector<ClientInfo> clientsToCheck;
357 PingedClientMap& pingedClients = mPingedClients[timeout];
358 {
359 Mutex::Autolock lock(mMutex);
360 pingedClients.clear();
361 clientsToCheck = mClients[timeout];
362 for (auto& clientInfo : clientsToCheck) {
363 if (mStoppedUserIds.count(clientInfo.userId) > 0) {
364 continue;
365 }
366 int sessionId = getNewSessionId();
367 clientInfo.sessionId = sessionId;
368 pingedClients.insert(std::make_pair(sessionId, clientInfo));
369 }
370 }
371
372 for (const auto& clientInfo : clientsToCheck) {
373 Status status = clientInfo.checkIfAlive(timeout);
374 if (!status.isOk()) {
375 ALOGW("Sending a ping message to client(pid: %d) failed: %s", clientInfo.pid,
376 status.exceptionMessage().c_str());
377 {
378 Mutex::Autolock lock(mMutex);
379 pingedClients.erase(clientInfo.sessionId);
380 }
381 }
382 }
383 // Though the size of pingedClients is a more specific measure, clientsToCheck is used as a
384 // conservative approach.
385 if (clientsToCheck.size() > 0) {
386 auto durationNs = timeoutToDurationNs(timeout);
387 mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
388 }
389 }
390
start()391 Result<void> WatchdogProcessService::start() {
392 if (mServiceStarted) {
393 return Error(INVALID_OPERATION) << "Cannot start process monitoring more than once";
394 }
395 mServiceStarted = true;
396 reportWatchdogAliveToVhal();
397 return {};
398 }
399
terminate()400 void WatchdogProcessService::terminate() {
401 Mutex::Autolock lock(mMutex);
402 for (const auto& timeout : kTimeouts) {
403 std::vector<ClientInfo>& clients = mClients[timeout];
404 for (auto it = clients.begin(); it != clients.end();) {
405 it->unlinkToDeath(mBinderDeathRecipient);
406 it = clients.erase(it);
407 }
408 }
409 mWatchdogServiceHelper.clear();
410 if (mMonitor != nullptr) {
411 sp<IBinder> binder = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
412 binder->unlinkToDeath(mBinderDeathRecipient);
413 }
414 if (mVhalService != nullptr) {
415 mVhalService->unlinkToDeath(mHidlDeathRecipient);
416 }
417 mServiceStarted = false;
418 }
419
registerClientLocked(const ClientInfo & clientInfo,TimeoutLength timeout)420 Status WatchdogProcessService::registerClientLocked(const ClientInfo& clientInfo,
421 TimeoutLength timeout) {
422 if (findClientAndProcessLocked(kTimeouts, clientInfo, nullptr)) {
423 ALOGW("Failed to register (%s) as it is already registered.",
424 clientInfo.toString().c_str());
425 return Status::ok();
426 }
427 status_t status = clientInfo.linkToDeath(mBinderDeathRecipient);
428 if (status != OK) {
429 ALOGW("Failed to register (%s) as it is dead", clientInfo.toString().c_str());
430 std::string errorStr = StringPrintf("(%s) is dead", clientInfo.toString().c_str());
431 return Status::fromExceptionCode(Status::EX_ILLEGAL_STATE, errorStr.c_str());
432 }
433 std::vector<ClientInfo>& clients = mClients[timeout];
434 clients.emplace_back(clientInfo);
435
436 // If the client array becomes non-empty, start health checking.
437 if (clients.size() == 1) {
438 startHealthCheckingLocked(timeout);
439 }
440 if (DEBUG) {
441 ALOGD("Car watchdog client (%s, timeout = %d) is registered", clientInfo.toString().c_str(),
442 timeout);
443 }
444 return Status::ok();
445 }
446
unregisterClientLocked(const std::vector<TimeoutLength> & timeouts,sp<IBinder> binder,ClientType clientType)447 Status WatchdogProcessService::unregisterClientLocked(const std::vector<TimeoutLength>& timeouts,
448 sp<IBinder> binder, ClientType clientType) {
449 const char* clientName = clientType == ClientType::Regular ? "client" : "watchdog service";
450 bool result = findClientAndProcessLocked(timeouts, binder,
451 [&](std::vector<ClientInfo>& clients,
452 std::vector<ClientInfo>::const_iterator it) {
453 it->unlinkToDeath(mBinderDeathRecipient);
454 clients.erase(it);
455 });
456 if (!result) {
457 std::string errorStr = StringPrintf("The %s has not been registered", clientName);
458 const char* errorCause = errorStr.c_str();
459 ALOGW("Failed to unregister the %s: %s", clientName, errorCause);
460 return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT, errorCause);
461 }
462 if (DEBUG) {
463 ALOGD("Car watchdog %s is unregistered", clientName);
464 }
465 return Status::ok();
466 }
467
tellClientAliveLocked(const sp<IBinder> & binder,int32_t sessionId)468 Status WatchdogProcessService::tellClientAliveLocked(const sp<IBinder>& binder, int32_t sessionId) {
469 for (const auto& timeout : kTimeouts) {
470 PingedClientMap& clients = mPingedClients[timeout];
471 PingedClientMap::const_iterator it = clients.find(sessionId);
472 if (it == clients.cend() || !it->second.matchesBinder(binder)) {
473 continue;
474 }
475 clients.erase(it);
476 return Status::ok();
477 }
478 return Status::fromExceptionCode(Status::EX_ILLEGAL_ARGUMENT,
479 "The client is not registered or the session ID is not found");
480 }
481
findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,const ClientInfo & clientInfo,const Processor & processor)482 bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
483 const ClientInfo& clientInfo,
484 const Processor& processor) {
485 for (const auto& timeout : timeouts) {
486 std::vector<ClientInfo>& clients = mClients[timeout];
487 for (auto it = clients.begin(); it != clients.end(); it++) {
488 if (std::as_const(*it) != clientInfo) {
489 continue;
490 }
491 if (processor != nullptr) {
492 processor(clients, it);
493 }
494 return true;
495 }
496 }
497 return false;
498 }
499
findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,const sp<IBinder> binder,const Processor & processor)500 bool WatchdogProcessService::findClientAndProcessLocked(const std::vector<TimeoutLength> timeouts,
501 const sp<IBinder> binder,
502 const Processor& processor) {
503 for (const auto& timeout : timeouts) {
504 std::vector<ClientInfo>& clients = mClients[timeout];
505 for (auto it = clients.begin(); it != clients.end(); it++) {
506 if (!it->matchesBinder(binder)) {
507 continue;
508 }
509 if (processor != nullptr) {
510 processor(clients, it);
511 }
512 return true;
513 }
514 }
515 return false;
516 }
517
startHealthCheckingLocked(TimeoutLength timeout)518 Result<void> WatchdogProcessService::startHealthCheckingLocked(TimeoutLength timeout) {
519 PingedClientMap& clients = mPingedClients[timeout];
520 clients.clear();
521 int what = static_cast<int>(timeout);
522 auto durationNs = timeoutToDurationNs(timeout);
523 mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler, Message(what));
524 return {};
525 }
526
dumpAndKillClientsIfNotResponding(TimeoutLength timeout)527 Result<void> WatchdogProcessService::dumpAndKillClientsIfNotResponding(TimeoutLength timeout) {
528 std::vector<int32_t> processIds;
529 std::vector<const ClientInfo*> clientsToNotify;
530 {
531 Mutex::Autolock lock(mMutex);
532 PingedClientMap& clients = mPingedClients[timeout];
533 for (PingedClientMap::const_iterator it = clients.cbegin(); it != clients.cend(); it++) {
534 pid_t pid = -1;
535 userid_t userId = -1;
536 std::vector<TimeoutLength> timeouts = {timeout};
537 findClientAndProcessLocked(timeouts, it->second,
538 [&](std::vector<ClientInfo>& cachedClients,
539 std::vector<ClientInfo>::const_iterator
540 cachedClientsIt) {
541 pid = cachedClientsIt->pid;
542 userId = cachedClientsIt->userId;
543 cachedClients.erase(cachedClientsIt);
544 });
545 if (pid != -1 && mStoppedUserIds.count(userId) == 0) {
546 clientsToNotify.emplace_back(&it->second);
547 processIds.push_back(pid);
548 }
549 }
550 }
551 for (const ClientInfo*& clientInfo : clientsToNotify) {
552 clientInfo->prepareProcessTermination();
553 }
554 return dumpAndKillAllProcesses(processIds, true);
555 }
556
dumpAndKillAllProcesses(const std::vector<int32_t> & processesNotResponding,bool reportToVhal)557 Result<void> WatchdogProcessService::dumpAndKillAllProcesses(
558 const std::vector<int32_t>& processesNotResponding, bool reportToVhal) {
559 size_t size = processesNotResponding.size();
560 if (size == 0) {
561 return {};
562 }
563 std::string pidString = pidArrayToString(processesNotResponding);
564 sp<aawi::ICarWatchdogMonitor> monitor;
565 {
566 Mutex::Autolock lock(mMutex);
567 if (mMonitor == nullptr) {
568 std::string errorMsg =
569 StringPrintf("Failed to dump and kill processes(pid = %s): Monitor is not set",
570 pidString.c_str());
571 ALOGW("%s", errorMsg.c_str());
572 return Error() << errorMsg;
573 }
574 monitor = mMonitor;
575 }
576 if (isSystemShuttingDown()) {
577 ALOGI("Skip dumping and killing processes(%s): The system is shutting down",
578 pidString.c_str());
579 return {};
580 }
581 if (reportToVhal) {
582 reportTerminatedProcessToVhal(processesNotResponding);
583 }
584 monitor->onClientsNotResponding(processesNotResponding);
585 if (DEBUG) {
586 ALOGD("Dumping and killing processes is requested: %s", pidString.c_str());
587 }
588 return {};
589 }
590
591 // Handle when car watchdog clients die.
handleBinderDeath(const wp<IBinder> & who)592 void WatchdogProcessService::handleBinderDeath(const wp<IBinder>& who) {
593 Mutex::Autolock lock(mMutex);
594 IBinder* binder = who.unsafe_get();
595 // Check if dead binder is monitor.
596 sp<IBinder> monitor = aawi::BnCarWatchdogMonitor::asBinder(mMonitor);
597 if (monitor == binder) {
598 mMonitor = nullptr;
599 ALOGW("The monitor has died.");
600 return;
601 }
602 findClientAndProcessLocked(kTimeouts, binder,
603 [&](std::vector<ClientInfo>& clients,
604 std::vector<ClientInfo>::const_iterator it) {
605 ALOGW("Client(pid: %d) died", it->pid);
606 clients.erase(it);
607 });
608 }
609
610 // Handle when VHAL dies.
handleHidlDeath(const wp<IBase> & who)611 void WatchdogProcessService::handleHidlDeath(const wp<IBase>& who) {
612 Mutex::Autolock lock(mMutex);
613 if (!interfacesEqual(mVhalService, who.promote())) {
614 return;
615 }
616 ALOGW("VHAL has died.");
617 mVhalService->unlinkToDeath(mHidlDeathRecipient);
618 mVhalService = nullptr;
619 }
620
reportWatchdogAliveToVhal()621 void WatchdogProcessService::reportWatchdogAliveToVhal() {
622 if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_ALIVE) > 0) {
623 ALOGW("VHAL doesn't support WATCHDOG_ALIVE. Car watchdog will not update WATCHDOG_ALIVE.");
624 return;
625 }
626 int64_t systemUptime = uptimeMillis();
627 VehiclePropValue propValue{
628 .prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_ALIVE),
629 .status = VehiclePropertyStatus::AVAILABLE,
630 .value = {.int64Values = {systemUptime}},
631 };
632 const auto& ret = updateVhal(propValue);
633 if (!ret.ok()) {
634 ALOGW("Failed to update WATCHDOG_ALIVE VHAL property. Will try again in 3s");
635 }
636 // Update VHAL with the interval of TIMEOUT_CRITICAL(3s).
637 auto durationNs = timeoutToDurationNs(TimeoutLength::TIMEOUT_CRITICAL);
638 mHandlerLooper->removeMessages(mMessageHandler, MSG_VHAL_WATCHDOG_ALIVE);
639 mHandlerLooper->sendMessageDelayed(durationNs.count(), mMessageHandler,
640 Message(MSG_VHAL_WATCHDOG_ALIVE));
641 }
642
reportTerminatedProcessToVhal(const std::vector<int32_t> & processesNotResponding)643 void WatchdogProcessService::reportTerminatedProcessToVhal(
644 const std::vector<int32_t>& processesNotResponding) {
645 if (mNotSupportedVhalProperties.count(VehicleProperty::WATCHDOG_TERMINATED_PROCESS) > 0) {
646 ALOGW("VHAL doesn't support WATCHDOG_TERMINATED_PROCESS. Terminated process is not "
647 "reported to VHAL.");
648 return;
649 }
650 for (auto&& pid : processesNotResponding) {
651 const auto& retCmdLine = readProcCmdLine(pid);
652 if (!retCmdLine.ok()) {
653 ALOGW("Failed to get process command line for pid(%d): %s", pid,
654 retCmdLine.error().message().c_str());
655 continue;
656 }
657 std::string procCmdLine = retCmdLine.value();
658 VehiclePropValue propValue{
659 .prop = static_cast<int32_t>(VehicleProperty::WATCHDOG_TERMINATED_PROCESS),
660 .status = VehiclePropertyStatus::AVAILABLE,
661 .value = {
662 .int32Values = {static_cast<int32_t>(
663 ProcessTerminationReason::NOT_RESPONDING)},
664 .stringValue = procCmdLine,
665 },
666 };
667 const auto& retUpdate = updateVhal(propValue);
668 if (!retUpdate.ok()) {
669 ALOGW("Failed to update WATCHDOG_TERMINATED_PROCESS VHAL property(command line: %s)",
670 procCmdLine.c_str());
671 }
672 }
673 }
674
updateVhal(const VehiclePropValue & value)675 Result<void> WatchdogProcessService::updateVhal(const VehiclePropValue& value) {
676 Mutex::Autolock lock(mMutex);
677 const auto& connectRet = connectToVhalLocked();
678 if (!connectRet.ok()) {
679 std::string errorMsg = "VHAL is not connected: " + connectRet.error().message();
680 ALOGW("%s", errorMsg.c_str());
681 return Error() << errorMsg;
682 }
683 if (mNotSupportedVhalProperties.count(static_cast<VehicleProperty>(value.prop)) > 0) {
684 std::string errorMsg = StringPrintf("VHAL doesn't support property(id: %d)", value.prop);
685 ALOGW("%s", errorMsg.c_str());
686 return Error() << errorMsg;
687 }
688 const auto& updateRet = mVhalService->set(value);
689 if (updateRet.isOk() && updateRet == StatusCode::OK) {
690 return {};
691 }
692 return Error() << "Failed to set propValue(" << value.prop << ") to VHAL";
693 }
694
readProcCmdLine(int32_t pid)695 Result<std::string> WatchdogProcessService::readProcCmdLine(int32_t pid) {
696 std::string cmdLinePath = StringPrintf("/proc/%d/cmdline", pid);
697 std::string procCmdLine;
698 if (ReadFileToString(cmdLinePath, &procCmdLine)) {
699 std::replace(procCmdLine.begin(), procCmdLine.end(), '\0', ' ');
700 procCmdLine = Trim(procCmdLine);
701 return procCmdLine;
702 }
703 return Error() << "Failed to read " << cmdLinePath;
704 }
705
connectToVhalLocked()706 Result<void> WatchdogProcessService::connectToVhalLocked() {
707 if (mVhalService.get() != nullptr) {
708 return {};
709 }
710 mVhalService = IVehicle::tryGetService();
711 if (mVhalService.get() == nullptr) {
712 return Error() << "Failed to connect to VHAL.";
713 }
714 mVhalService->linkToDeath(mHidlDeathRecipient, /*cookie=*/0);
715 queryVhalPropertiesLocked();
716 subscribeToVhalHeartBeatLocked();
717 ALOGI("Successfully connected to VHAL.");
718 return {};
719 }
720
queryVhalPropertiesLocked()721 void WatchdogProcessService::queryVhalPropertiesLocked() {
722 mNotSupportedVhalProperties.clear();
723 std::vector<VehicleProperty> propIds = {VehicleProperty::WATCHDOG_ALIVE,
724 VehicleProperty::WATCHDOG_TERMINATED_PROCESS,
725 VehicleProperty::VHAL_HEARTBEAT};
726 for (const auto& propId : propIds) {
727 if (!isVhalPropertySupportedLocked(propId)) {
728 mNotSupportedVhalProperties.insert(propId);
729 }
730 }
731 }
732
isVhalPropertySupportedLocked(VehicleProperty propId)733 bool WatchdogProcessService::isVhalPropertySupportedLocked(VehicleProperty propId) {
734 StatusCode status;
735 hidl_vec<int32_t> props = {static_cast<int32_t>(propId)};
736 mVhalService->getPropConfigs(props,
737 [&status](StatusCode s,
738 hidl_vec<VehiclePropConfig> /*propConfigs*/) {
739 status = s;
740 });
741 return status == StatusCode::OK;
742 }
743
subscribeToVhalHeartBeatLocked()744 void WatchdogProcessService::subscribeToVhalHeartBeatLocked() {
745 if (mNotSupportedVhalProperties.count(VehicleProperty::VHAL_HEARTBEAT) > 0) {
746 ALOGW("VHAL doesn't support VHAL_HEARTBEAT. Checking VHAL health is disabled.");
747 return;
748 }
749
750 mVhalHeartBeat = {
751 .eventTime = 0,
752 .value = 0,
753 };
754
755 SubscribeOptions reqVhalProperties[] = {
756 {.propId = static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT),
757 .flags = SubscribeFlags::EVENTS_FROM_CAR},
758 };
759 hidl_vec<SubscribeOptions> options;
760 options.setToExternal(reqVhalProperties, arraysize(reqVhalProperties));
761 StatusCode status = mVhalService->subscribe(mPropertyChangeListener, options);
762 if (status != StatusCode::OK) {
763 ALOGW("Failed to subscribe to VHAL_HEARTBEAT. Checking VHAL health is disabled.");
764 return;
765 }
766 mHandlerLooper->sendMessageDelayed(kVhalHealthCheckDelayNs.count(), mMessageHandler,
767 Message(MSG_VHAL_HEALTH_CHECK));
768 }
769
getNewSessionId()770 int32_t WatchdogProcessService::getNewSessionId() {
771 // Make sure that session id is always positive number.
772 if (++mLastSessionId <= 0) {
773 mLastSessionId = 1;
774 }
775 return mLastSessionId;
776 }
777
updateVhalHeartBeat(int64_t value)778 void WatchdogProcessService::updateVhalHeartBeat(int64_t value) {
779 bool wrongHeartBeat;
780 {
781 Mutex::Autolock lock(mMutex);
782 wrongHeartBeat = value <= mVhalHeartBeat.value;
783 mVhalHeartBeat.eventTime = uptimeMillis();
784 mVhalHeartBeat.value = value;
785 }
786 if (wrongHeartBeat) {
787 ALOGW("VHAL updated heart beat with a wrong value. Terminating VHAL...");
788 terminateVhal();
789 return;
790 }
791 mHandlerLooper->sendMessageDelayed(kVhalHealthCheckDelayNs.count(), mMessageHandler,
792 Message(MSG_VHAL_HEALTH_CHECK));
793 }
794
checkVhalHealth()795 void WatchdogProcessService::checkVhalHealth() {
796 int64_t lastEventTime;
797 int64_t currentUptime = uptimeMillis();
798 {
799 Mutex::Autolock lock(mMutex);
800 lastEventTime = mVhalHeartBeat.eventTime;
801 }
802 if (currentUptime > lastEventTime + kVhalHeartBeatIntervalMs) {
803 ALOGW("VHAL failed to update heart beat within timeout. Terminating VHAL...");
804 terminateVhal();
805 }
806 }
807
terminateVhal()808 void WatchdogProcessService::terminateVhal() {
809 using ::android::hidl::manager::V1_0::IServiceManager;
810
811 std::vector<int32_t> processIds;
812 sp<IServiceManager> manager = IServiceManager::getService();
813 Return<void> ret = manager->debugDump([&](auto& hals) {
814 for (const auto& info : hals) {
815 if (info.pid == static_cast<int>(IServiceManager::PidConstant::NO_PID)) {
816 continue;
817 }
818 if (info.interfaceName == kVhalInterfaceName) {
819 processIds.push_back(info.pid);
820 break;
821 }
822 }
823 });
824
825 if (!ret.isOk()) {
826 ALOGE("Failed to terminate VHAL: could not get VHAL process id");
827 return;
828 } else if (processIds.empty()) {
829 ALOGE("Failed to terminate VHAL: VHAL is not running");
830 return;
831 }
832 dumpAndKillAllProcesses(processIds, false);
833 }
834
toString() const835 std::string WatchdogProcessService::ClientInfo::toString() const {
836 std::string buffer;
837 StringAppendF(&buffer, "pid = %d, userId = %d, type = %s", pid, userId,
838 type == ClientType::Regular ? "regular" : "watchdog service");
839 return buffer;
840 }
841
getBinder() const842 sp<IBinder> WatchdogProcessService::ClientInfo::getBinder() const {
843 if (type == ClientType::Regular) {
844 return BnCarWatchdogClient::asBinder(client);
845 }
846 return watchdogServiceBinder;
847 }
848
linkToDeath(const sp<IBinder::DeathRecipient> & recipient) const849 status_t WatchdogProcessService::ClientInfo::linkToDeath(
850 const sp<IBinder::DeathRecipient>& recipient) const {
851 if (type == ClientType::Regular) {
852 return BnCarWatchdogClient::asBinder(client)->linkToDeath(recipient);
853 }
854 // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
855 // skip this step.
856 return OK;
857 }
858
unlinkToDeath(const wp<IBinder::DeathRecipient> & recipient) const859 status_t WatchdogProcessService::ClientInfo::unlinkToDeath(
860 const wp<IBinder::DeathRecipient>& recipient) const {
861 if (type == ClientType::Regular) {
862 return BnCarWatchdogClient::asBinder(client)->unlinkToDeath(recipient);
863 }
864 // WatchdogServiceHelper is the binder death recipient for watchdog service, ergo
865 // skip this step.
866 return OK;
867 }
868
checkIfAlive(TimeoutLength timeout) const869 Status WatchdogProcessService::ClientInfo::checkIfAlive(TimeoutLength timeout) const {
870 if (type == ClientType::Regular) {
871 return client->checkIfAlive(sessionId, timeout);
872 }
873 return watchdogServiceHelper->checkIfAlive(watchdogServiceBinder, sessionId, timeout);
874 }
875
prepareProcessTermination() const876 Status WatchdogProcessService::ClientInfo::prepareProcessTermination() const {
877 if (type == ClientType::Regular) {
878 return client->prepareProcessTermination();
879 }
880 return watchdogServiceHelper->prepareProcessTermination(watchdogServiceBinder);
881 }
882
BinderDeathRecipient(const sp<WatchdogProcessService> & service)883 WatchdogProcessService::BinderDeathRecipient::BinderDeathRecipient(
884 const sp<WatchdogProcessService>& service) :
885 mService(service) {}
886
binderDied(const wp<IBinder> & who)887 void WatchdogProcessService::BinderDeathRecipient::binderDied(const wp<IBinder>& who) {
888 mService->handleBinderDeath(who);
889 }
890
HidlDeathRecipient(const sp<WatchdogProcessService> & service)891 WatchdogProcessService::HidlDeathRecipient::HidlDeathRecipient(
892 const sp<WatchdogProcessService>& service) :
893 mService(service) {}
894
serviceDied(uint64_t,const wp<IBase> & who)895 void WatchdogProcessService::HidlDeathRecipient::serviceDied(uint64_t /*cookie*/,
896 const wp<IBase>& who) {
897 mService->handleHidlDeath(who);
898 }
899
PropertyChangeListener(const sp<WatchdogProcessService> & service)900 WatchdogProcessService::PropertyChangeListener::PropertyChangeListener(
901 const sp<WatchdogProcessService>& service) :
902 mService(service) {}
903
onPropertyEvent(const hidl_vec<VehiclePropValue> & propValues)904 Return<void> WatchdogProcessService::PropertyChangeListener::onPropertyEvent(
905 const hidl_vec<VehiclePropValue>& propValues) {
906 for (const auto& value : propValues) {
907 if (value.prop == static_cast<int32_t>(VehicleProperty::VHAL_HEARTBEAT)) {
908 mService->updateVhalHeartBeat(value.value.int64Values[0]);
909 break;
910 }
911 }
912 return Return<void>();
913 }
914
onPropertySet(const VehiclePropValue &)915 Return<void> WatchdogProcessService::PropertyChangeListener::onPropertySet(
916 const VehiclePropValue& /*propValue*/) {
917 return Return<void>();
918 }
919
onPropertySetError(StatusCode,int32_t,int32_t)920 Return<void> WatchdogProcessService::PropertyChangeListener::onPropertySetError(
921 StatusCode /*status*/, int32_t /*propId*/, int32_t /*areaId*/) {
922 return Return<void>();
923 }
924
MessageHandlerImpl(const sp<WatchdogProcessService> & service)925 WatchdogProcessService::MessageHandlerImpl::MessageHandlerImpl(
926 const sp<WatchdogProcessService>& service) :
927 mService(service) {}
928
handleMessage(const Message & message)929 void WatchdogProcessService::MessageHandlerImpl::handleMessage(const Message& message) {
930 switch (message.what) {
931 case static_cast<int>(TimeoutLength::TIMEOUT_CRITICAL):
932 case static_cast<int>(TimeoutLength::TIMEOUT_MODERATE):
933 case static_cast<int>(TimeoutLength::TIMEOUT_NORMAL):
934 mService->doHealthCheck(message.what);
935 break;
936 case MSG_VHAL_WATCHDOG_ALIVE:
937 mService->reportWatchdogAliveToVhal();
938 break;
939 case MSG_VHAL_HEALTH_CHECK:
940 mService->checkVhalHealth();
941 break;
942 default:
943 ALOGW("Unknown message: %d", message.what);
944 }
945 }
946
947 } // namespace watchdog
948 } // namespace automotive
949 } // namespace android
950