• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.car.watchdog;
18 
19 import static android.car.watchdog.CarWatchdogManager.TIMEOUT_CRITICAL;
20 import static android.car.watchdog.CarWatchdogManager.TIMEOUT_MODERATE;
21 import static android.car.watchdog.CarWatchdogManager.TIMEOUT_NORMAL;
22 
23 import static com.android.car.internal.ExcludeFromCodeCoverageGeneratedReport.DUMP_INFO;
24 
25 import android.annotation.NonNull;
26 import android.annotation.UserIdInt;
27 import android.automotive.watchdog.internal.ICarWatchdogServiceForSystem;
28 import android.automotive.watchdog.internal.ProcessIdentifier;
29 import android.car.builtin.util.Slogf;
30 import android.car.watchdog.ICarWatchdogServiceCallback;
31 import android.car.watchdoglib.CarWatchdogDaemonHelper;
32 import android.os.Binder;
33 import android.os.Handler;
34 import android.os.IBinder;
35 import android.os.Looper;
36 import android.os.RemoteException;
37 import android.os.SystemClock;
38 import android.os.SystemProperties;
39 import android.util.SparseArray;
40 import android.util.SparseBooleanArray;
41 
42 import com.android.car.internal.ExcludeFromCodeCoverageGeneratedReport;
43 import com.android.car.internal.util.IndentingPrintWriter;
44 import com.android.internal.annotations.GuardedBy;
45 
46 import java.util.ArrayList;
47 import java.util.List;
48 import java.util.Optional;
49 
50 /**
51  * Handles clients' health status checking and reporting the statuses to the watchdog daemon.
52  */
53 public final class WatchdogProcessHandler {
54     static final String PROPERTY_RO_CLIENT_HEALTHCHECK_INTERVAL =
55             "ro.carwatchdog.client_healthcheck.interval";
56     static final int MISSING_INT_PROPERTY_VALUE = -1;
57 
58     private static final int[] ALL_TIMEOUTS =
59             { TIMEOUT_CRITICAL, TIMEOUT_MODERATE, TIMEOUT_NORMAL };
60 
61     private final ICarWatchdogServiceForSystem mWatchdogServiceForSystem;
62     private final CarWatchdogDaemonHelper mCarWatchdogDaemonHelper;
63     private final Handler mMainHandler = new Handler(Looper.getMainLooper());
64     private final Object mLock = new Object();
65     /*
66      * Keeps the list of car watchdog client according to timeout:
67      * key => timeout, value => ClientInfo list.
68      * The value of SparseArray is guarded by mLock.
69      */
70     @GuardedBy("mLock")
71     private final SparseArray<ArrayList<ClientInfo>> mClientMap = new SparseArray<>();
72     /*
73      * Keeps the map of car watchdog client being checked by CarWatchdogService according to
74      * timeout: key => timeout, value => ClientInfo map.
75      * The value is also a map: key => session id, value => ClientInfo.
76      */
77     @GuardedBy("mLock")
78     private final SparseArray<SparseArray<ClientInfo>> mPingedClientMap = new SparseArray<>();
79     /*
80      * Keeps whether client health checking is being performed according to timeout:
81      * key => timeout, value => boolean (whether client health checking is being performed).
82      * The value of SparseArray is guarded by mLock.
83      */
84     @GuardedBy("mLock")
85     private final SparseArray<Boolean> mClientCheckInProgress = new SparseArray<>();
86     @GuardedBy("mLock")
87     private final ArrayList<ClientInfo> mClientsNotResponding = new ArrayList<>();
88     // mLastSessionId should only be accessed from the main thread.
89     @GuardedBy("mLock")
90     private int mLastSessionId;
91     @GuardedBy("mLock")
92     private final SparseBooleanArray mStoppedUser = new SparseBooleanArray();
93 
94     private long mOverriddenClientHealthCheckWindowMs = MISSING_INT_PROPERTY_VALUE;
95 
WatchdogProcessHandler(ICarWatchdogServiceForSystem serviceImpl, CarWatchdogDaemonHelper daemonHelper)96     public WatchdogProcessHandler(ICarWatchdogServiceForSystem serviceImpl,
97             CarWatchdogDaemonHelper daemonHelper) {
98         mWatchdogServiceForSystem = serviceImpl;
99         mCarWatchdogDaemonHelper = daemonHelper;
100     }
101 
102     /** Initializes the handler. */
init()103     public void init() {
104         synchronized (mLock) {
105             for (int timeout : ALL_TIMEOUTS) {
106                 mClientMap.put(timeout, new ArrayList<ClientInfo>());
107                 mPingedClientMap.put(timeout, new SparseArray<ClientInfo>());
108                 mClientCheckInProgress.put(timeout, false);
109             }
110         }
111         // Overridden timeout value must be greater than  or equal to the maximum possible timeout
112         // value. Otherwise, clients will be pinged more frequently than the guaranteed timeout
113         // duration.
114         int clientHealthCheckWindowSec = SystemProperties.getInt(
115                 PROPERTY_RO_CLIENT_HEALTHCHECK_INTERVAL, MISSING_INT_PROPERTY_VALUE);
116         if (clientHealthCheckWindowSec != MISSING_INT_PROPERTY_VALUE) {
117             mOverriddenClientHealthCheckWindowMs = Math.max(clientHealthCheckWindowSec * 1000L,
118                     getTimeoutDurationMs(TIMEOUT_NORMAL));
119         }
120         if (CarWatchdogService.DEBUG) {
121             Slogf.d(CarWatchdogService.TAG, "WatchdogProcessHandler is initialized");
122         }
123     }
124 
125     /** Dumps its state. */
126     @ExcludeFromCodeCoverageGeneratedReport(reason = DUMP_INFO)
dump(IndentingPrintWriter writer)127     public void dump(IndentingPrintWriter writer) {
128         synchronized (mLock) {
129             writer.println("Registered clients");
130             writer.increaseIndent();
131             int count = 1;
132             for (int timeout : ALL_TIMEOUTS) {
133                 ArrayList<ClientInfo> clients = mClientMap.get(timeout);
134                 String timeoutStr = timeoutToString(timeout);
135                 for (ClientInfo clientInfo : clients) {
136                     writer.printf("client #%d: timeout = %s, pid = %d\n", count++, timeoutStr,
137                             clientInfo.pid);
138                 }
139             }
140             writer.printf("Stopped users: ");
141             int size = mStoppedUser.size();
142             if (size > 0) {
143                 writer.printf("%d", mStoppedUser.keyAt(0));
144                 for (int i = 1; i < size; i++) {
145                     writer.printf(", %d", mStoppedUser.keyAt(i));
146                 }
147                 writer.println();
148             } else {
149                 writer.println("none");
150             }
151             writer.decreaseIndent();
152         }
153     }
154 
155     /** Registers the client callback */
registerClient(ICarWatchdogServiceCallback client, int timeout)156     public void registerClient(ICarWatchdogServiceCallback client, int timeout) {
157         synchronized (mLock) {
158             ArrayList<ClientInfo> clients = mClientMap.get(timeout);
159             if (clients == null) {
160                 Slogf.w(CarWatchdogService.TAG, "Cannot register the client: invalid timeout");
161                 return;
162             }
163             IBinder binder = client.asBinder();
164             for (int i = 0; i < clients.size(); i++) {
165                 ClientInfo clientInfo = clients.get(i);
166                 if (binder == clientInfo.client.asBinder()) {
167                     Slogf.w(CarWatchdogService.TAG,
168                             "Cannot register the client: the client(pid: %d) has been already "
169                             + "registered", clientInfo.pid);
170                     return;
171                 }
172             }
173             int pid = Binder.getCallingPid();
174             int userId = Binder.getCallingUserHandle().getIdentifier();
175             ClientInfo clientInfo = new ClientInfo(client, pid, userId, timeout);
176             try {
177                 clientInfo.linkToDeath();
178             } catch (RemoteException e) {
179                 Slogf.w(CarWatchdogService.TAG,
180                         "Cannot register the client: linkToDeath to the client failed");
181                 return;
182             }
183             clients.add(clientInfo);
184             if (CarWatchdogService.DEBUG) {
185                 Slogf.d(CarWatchdogService.TAG, "Registered client: %s", clientInfo);
186             }
187         }
188     }
189 
190     /** Unregisters the previously registered client callback */
unregisterClient(ICarWatchdogServiceCallback client)191     public void unregisterClient(ICarWatchdogServiceCallback client) {
192         ClientInfo clientInfo;
193         synchronized (mLock) {
194             IBinder binder = client.asBinder();
195             // Even if a client did not respond to the latest ping, CarWatchdogService should honor
196             // the unregister request at this point and remove it from all internal caches.
197             // Otherwise, the client might be killed even after unregistering.
198             Optional<ClientInfo> optionalClientInfo = removeFromClientMapsLocked(binder);
199             if (optionalClientInfo.isEmpty()) {
200                 Slogf.w(CarWatchdogService.TAG,
201                         "Cannot unregister the client: the client has not been registered before");
202                 return;
203             }
204             clientInfo = optionalClientInfo.get();
205             for (int i = 0; i < mClientsNotResponding.size(); i++) {
206                 ClientInfo notRespondingClientInfo = mClientsNotResponding.get(i);
207                 if (binder == notRespondingClientInfo.client.asBinder()) {
208                     mClientsNotResponding.remove(i);
209                     break;
210                 }
211             }
212         }
213         if (CarWatchdogService.DEBUG) {
214             Slogf.d(CarWatchdogService.TAG, "Unregistered client: %s", clientInfo);
215         }
216     }
217 
218     @GuardedBy("mLock")
removeFromClientMapsLocked(IBinder binder)219     private Optional<ClientInfo> removeFromClientMapsLocked(IBinder binder) {
220         for (int timeout : ALL_TIMEOUTS) {
221             ArrayList<ClientInfo> clients = mClientMap.get(timeout);
222             for (int i = 0; i < clients.size(); i++) {
223                 ClientInfo clientInfo = clients.get(i);
224                 if (binder != clientInfo.client.asBinder()) {
225                     continue;
226                 }
227                 clientInfo.unlinkToDeath();
228                 clients.remove(i);
229                 SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout);
230                 if (pingedClients != null) {
231                     pingedClients.remove(clientInfo.sessionId);
232                 }
233                 return Optional.of(clientInfo);
234             }
235         }
236         return Optional.empty();
237     }
238 
239     /** Tells the handler that the client is alive. */
tellClientAlive(ICarWatchdogServiceCallback client, int sessionId)240     public void tellClientAlive(ICarWatchdogServiceCallback client, int sessionId) {
241         synchronized (mLock) {
242             for (int timeout : ALL_TIMEOUTS) {
243                 if (!mClientCheckInProgress.get(timeout)) {
244                     continue;
245                 }
246                 SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout);
247                 ClientInfo clientInfo = pingedClients.get(sessionId);
248                 if (clientInfo != null && clientInfo.client.asBinder() == client.asBinder()) {
249                     pingedClients.remove(sessionId);
250                     return;
251                 }
252             }
253         }
254     }
255 
256     /** Updates the user stopped state */
updateUserState(@serIdInt int userId, boolean isStopped)257     public void updateUserState(@UserIdInt int userId, boolean isStopped) {
258         synchronized (mLock) {
259             if (isStopped) {
260                 mStoppedUser.put(userId, true);
261             } else {
262                 mStoppedUser.delete(userId);
263             }
264         }
265     }
266 
267     /** Posts health check message */
postHealthCheckMessage(int sessionId)268     public void postHealthCheckMessage(int sessionId) {
269         mMainHandler.postAtFrontOfQueue(() -> doHealthCheck(sessionId));
270     }
271 
272     /** Returns the registered and alive client count. */
getClientCount(int timeout)273     public int getClientCount(int timeout) {
274         synchronized (mLock) {
275             ArrayList<ClientInfo> clients = mClientMap.get(timeout);
276             return clients != null ? clients.size() : 0;
277         }
278     }
279 
280     /** Resets pinged clients before health checking */
prepareHealthCheck()281     public void prepareHealthCheck() {
282         synchronized (mLock) {
283             for (int timeout : ALL_TIMEOUTS) {
284                 SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout);
285                 pingedClients.clear();
286             }
287         }
288     }
289 
290     /** Enables/disables the watchdog daemon client health check process. */
controlProcessHealthCheck(boolean enable)291     void controlProcessHealthCheck(boolean enable) {
292         try {
293             mCarWatchdogDaemonHelper.controlProcessHealthCheck(enable);
294         } catch (RemoteException e) {
295             Slogf.w(CarWatchdogService.TAG,
296                     "Cannot enable/disable the car watchdog daemon health check process: %s", e);
297         }
298     }
299 
onClientDeath(ICarWatchdogServiceCallback client, int timeout)300     private void onClientDeath(ICarWatchdogServiceCallback client, int timeout) {
301         synchronized (mLock) {
302             removeClientLocked(client.asBinder(), timeout);
303         }
304     }
305 
doHealthCheck(int sessionId)306     private void doHealthCheck(int sessionId) {
307         // For critical clients, the response status are checked just before reporting to car
308         // watchdog daemon. For moderate and normal clients, the status are checked after allowed
309         // delay per timeout.
310         analyzeClientResponse(TIMEOUT_CRITICAL);
311         reportHealthCheckResult(sessionId);
312         sendPingToClients(TIMEOUT_CRITICAL);
313         sendPingToClientsAndCheck(TIMEOUT_MODERATE);
314         sendPingToClientsAndCheck(TIMEOUT_NORMAL);
315     }
316 
analyzeClientResponse(int timeout)317     private void analyzeClientResponse(int timeout) {
318         // Clients which are not responding are stored in mClientsNotResponding, and will be dumped
319         // and killed at the next response of CarWatchdogService to car watchdog daemon.
320         synchronized (mLock) {
321             SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout);
322             for (int i = 0; i < pingedClients.size(); i++) {
323                 ClientInfo clientInfo = pingedClients.valueAt(i);
324                 if (mStoppedUser.get(clientInfo.userId)) {
325                     continue;
326                 }
327                 mClientsNotResponding.add(clientInfo);
328                 removeClientLocked(clientInfo.client.asBinder(), timeout);
329             }
330             mClientCheckInProgress.setValueAt(timeout, false);
331         }
332     }
333 
sendPingToClients(int timeout)334     private void sendPingToClients(int timeout) {
335         ArrayList<ClientInfo> clientsToCheck;
336         synchronized (mLock) {
337             SparseArray<ClientInfo> pingedClients = mPingedClientMap.get(timeout);
338             pingedClients.clear();
339             clientsToCheck = new ArrayList<>(mClientMap.get(timeout));
340             for (int i = 0; i < clientsToCheck.size(); i++) {
341                 ClientInfo clientInfo = clientsToCheck.get(i);
342                 if (mStoppedUser.get(clientInfo.userId)) {
343                     continue;
344                 }
345                 int sessionId = getNewSessionId();
346                 clientInfo.sessionId = sessionId;
347                 pingedClients.put(sessionId, clientInfo);
348             }
349             mClientCheckInProgress.setValueAt(timeout, true);
350         }
351 
352         for (int i = 0; i < clientsToCheck.size(); i++) {
353             ClientInfo clientInfo = clientsToCheck.get(i);
354             try {
355                 clientInfo.client.onCheckHealthStatus(clientInfo.sessionId, timeout);
356             } catch (RemoteException e) {
357                 Slogf.w(CarWatchdogService.TAG,
358                         "Sending a ping message to client(pid: %d) failed: %s",
359                         clientInfo.pid, e);
360                 synchronized (mLock) {
361                     mPingedClientMap.get(timeout).remove(clientInfo.sessionId);
362                 }
363             }
364         }
365     }
366 
sendPingToClientsAndCheck(int timeout)367     private void sendPingToClientsAndCheck(int timeout) {
368         synchronized (mLock) {
369             if (mClientCheckInProgress.get(timeout)) {
370                 return;
371             }
372         }
373         sendPingToClients(timeout);
374         mMainHandler.postDelayed(
375                 () -> analyzeClientResponse(timeout), getTimeoutDurationMs(timeout));
376     }
377 
getNewSessionId()378     private int getNewSessionId() {
379         synchronized (mLock) {
380             if (++mLastSessionId <= 0) {
381                 mLastSessionId = 1;
382             }
383             return mLastSessionId;
384         }
385     }
386 
387     @GuardedBy("mLock")
removeClientLocked(IBinder clientBinder, int timeout)388     private void removeClientLocked(IBinder clientBinder, int timeout) {
389         ArrayList<ClientInfo> clients = mClientMap.get(timeout);
390         for (int i = 0; i < clients.size(); i++) {
391             ClientInfo clientInfo = clients.get(i);
392             if (clientBinder == clientInfo.client.asBinder()) {
393                 clients.remove(i);
394                 return;
395             }
396         }
397     }
398 
reportHealthCheckResult(int sessionId)399     private void reportHealthCheckResult(int sessionId) {
400         List<ProcessIdentifier> clientsNotResponding;
401         ArrayList<ClientInfo> clientsToNotify;
402         synchronized (mLock) {
403             clientsNotResponding = toProcessIdentifierList(mClientsNotResponding);
404             clientsToNotify = new ArrayList<>(mClientsNotResponding);
405             mClientsNotResponding.clear();
406         }
407         for (int i = 0; i < clientsToNotify.size(); i++) {
408             ClientInfo clientInfo = clientsToNotify.get(i);
409             try {
410                 clientInfo.client.onPrepareProcessTermination();
411             } catch (RemoteException e) {
412                 Slogf.w(CarWatchdogService.TAG,
413                         "Notifying onPrepareProcessTermination to client(pid: %d) failed: %s",
414                         clientInfo.pid, e);
415             }
416         }
417 
418         try {
419             mCarWatchdogDaemonHelper.tellCarWatchdogServiceAlive(
420                     mWatchdogServiceForSystem, clientsNotResponding, sessionId);
421         } catch (RemoteException | RuntimeException e) {
422             Slogf.w(CarWatchdogService.TAG,
423                     "Cannot respond to car watchdog daemon (sessionId=%d): %s", sessionId, e);
424         }
425     }
426 
427     @NonNull
toProcessIdentifierList( @onNull ArrayList<ClientInfo> clientInfos)428     private List<ProcessIdentifier> toProcessIdentifierList(
429             @NonNull ArrayList<ClientInfo> clientInfos) {
430         List<ProcessIdentifier> processIdentifiers = new ArrayList<>(clientInfos.size());
431         for (int i = 0; i < clientInfos.size(); i++) {
432             ClientInfo clientInfo = clientInfos.get(i);
433             ProcessIdentifier processIdentifier = new ProcessIdentifier();
434             processIdentifier.pid = clientInfo.pid;
435             processIdentifier.startTimeMillis = clientInfo.startTimeMillis;
436             processIdentifiers.add(processIdentifier);
437         }
438         return processIdentifiers;
439     }
440 
timeoutToString(int timeout)441     private String timeoutToString(int timeout) {
442         switch (timeout) {
443             case TIMEOUT_CRITICAL:
444                 return "critical";
445             case TIMEOUT_MODERATE:
446                 return "moderate";
447             case TIMEOUT_NORMAL:
448                 return "normal";
449             default:
450                 Slogf.w(CarWatchdogService.TAG, "Unknown timeout value");
451                 return "unknown";
452         }
453     }
454 
getTimeoutDurationMs(int timeout)455     private long getTimeoutDurationMs(int timeout) {
456         if (mOverriddenClientHealthCheckWindowMs != MISSING_INT_PROPERTY_VALUE) {
457             return mOverriddenClientHealthCheckWindowMs;
458         }
459         switch (timeout) {
460             case TIMEOUT_CRITICAL:
461                 return 3000L;
462             case TIMEOUT_MODERATE:
463                 return 5000L;
464             case TIMEOUT_NORMAL:
465                 return 10000L;
466             default:
467                 Slogf.w(CarWatchdogService.TAG, "Unknown timeout value");
468                 return 10000L;
469         }
470     }
471 
472     private final class ClientInfo implements IBinder.DeathRecipient {
473         public final ICarWatchdogServiceCallback client;
474         public final int pid;
475         public final long startTimeMillis;
476         @UserIdInt public final int userId;
477         public final int timeout;
478         public volatile int sessionId;
479 
ClientInfo(ICarWatchdogServiceCallback client, int pid, @UserIdInt int userId, int timeout)480         ClientInfo(ICarWatchdogServiceCallback client, int pid, @UserIdInt int userId,
481                 int timeout) {
482             this.client = client;
483             this.pid = pid;
484             // CarService doesn't have sepolicy access to read per-pid proc files, so it cannot
485             // fetch the pid's actual start time. When a client process registers with
486             // the CarService, it is safe to assume the process is still alive. So, populate
487             // elapsed real time and the consumer (CarServiceHelperService) of this data should
488             // verify that the actual start time is less than the reported start time.
489             this.startTimeMillis = SystemClock.elapsedRealtime();
490             this.userId = userId;
491             this.timeout = timeout;
492         }
493 
494         @Override
binderDied()495         public void binderDied() {
496             Slogf.w(CarWatchdogService.TAG, "Client(pid: %d) died", pid);
497             onClientDeath(client, timeout);
498         }
499 
linkToDeath()500         private void linkToDeath() throws RemoteException {
501             client.asBinder().linkToDeath(this, 0);
502         }
503 
unlinkToDeath()504         private void unlinkToDeath() {
505             client.asBinder().unlinkToDeath(this, 0);
506         }
507 
508         @Override
toString()509         public String toString() {
510             return "ClientInfo{client=" + client + ", pid=" + pid + ", startTimeMillis="
511                     + startTimeMillis + ", userId=" + userId + ", timeout=" + timeout
512                     + ", sessionId=" + sessionId + '}';
513         }
514     }
515 }
516