• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Zygote"
18 #define ATRACE_TAG ATRACE_TAG_DALVIK
19 
20 #include "com_android_internal_os_Zygote.h"
21 
22 #include <async_safe/log.h>
23 
24 // sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
25 #include <sys/mount.h>
26 #include <linux/fs.h>
27 #include <sys/types.h>
28 #include <dirent.h>
29 
30 #include <algorithm>
31 #include <array>
32 #include <atomic>
33 #include <functional>
34 #include <iterator>
35 #include <list>
36 #include <optional>
37 #include <sstream>
38 #include <string>
39 #include <string_view>
40 #include <unordered_set>
41 
42 #include <android/fdsan.h>
43 #include <arpa/inet.h>
44 #include <fcntl.h>
45 #include <grp.h>
46 #include <inttypes.h>
47 #include <malloc.h>
48 #include <mntent.h>
49 #include <paths.h>
50 #include <signal.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <sys/auxv.h>
54 #include <sys/capability.h>
55 #include <sys/cdefs.h>
56 #include <sys/eventfd.h>
57 #include <sys/personality.h>
58 #include <sys/prctl.h>
59 #include <sys/resource.h>
60 #include <sys/socket.h>
61 #include <sys/stat.h>
62 #include <sys/time.h>
63 #include <sys/types.h>
64 #include <sys/un.h>
65 #include <sys/utsname.h>
66 #include <sys/wait.h>
67 #include <unistd.h>
68 
69 #include <android-base/file.h>
70 #include <android-base/logging.h>
71 #include <android-base/properties.h>
72 #include <android-base/stringprintf.h>
73 #include <android-base/unique_fd.h>
74 #include <bionic/malloc.h>
75 #include <bionic/mte.h>
76 #include <cutils/fs.h>
77 #include <cutils/multiuser.h>
78 #include <cutils/sockets.h>
79 #include <private/android_filesystem_config.h>
80 #include <processgroup/processgroup.h>
81 #include <processgroup/sched_policy.h>
82 #include <seccomp_policy.h>
83 #include <selinux/android.h>
84 #include <stats_socket.h>
85 #include <utils/String8.h>
86 #include <utils/Trace.h>
87 
88 #include <nativehelper/JNIHelp.h>
89 #include <nativehelper/ScopedLocalRef.h>
90 #include <nativehelper/ScopedPrimitiveArray.h>
91 #include <nativehelper/ScopedUtfChars.h>
92 #include "core_jni_helpers.h"
93 #include "fd_utils.h"
94 #include "filesystem_utils.h"
95 
96 #include "nativebridge/native_bridge.h"
97 
98 namespace {
99 
100 // TODO (chriswailes): Add a function to initialize native Zygote data.
101 // TODO (chriswailes): Fix mixed indentation style (2 and 4 spaces).
102 
103 using namespace std::placeholders;
104 
105 using android::String8;
106 using android::base::ReadFileToString;
107 using android::base::StringAppendF;
108 using android::base::StringPrintf;
109 using android::base::WriteStringToFile;
110 using android::base::GetBoolProperty;
111 
112 using android::zygote::ZygoteFailure;
113 
114 // This type is duplicated in fd_utils.h
115 typedef const std::function<void(std::string)>& fail_fn_t;
116 
117 static pid_t gSystemServerPid = 0;
118 
119 static constexpr const char* kVoldAppDataIsolation = "persist.sys.vold_app_data_isolation_enabled";
120 static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
121 static jclass gZygoteClass;
122 static jmethodID gCallPostForkSystemServerHooks;
123 static jmethodID gCallPostForkChildHooks;
124 
125 static constexpr const char* kZygoteInitClassName = "com/android/internal/os/ZygoteInit";
126 static jclass gZygoteInitClass;
127 static jmethodID gGetOrCreateSystemServerClassLoader;
128 
129 static bool gIsSecurityEnforced = true;
130 
131 /**
132  * True if the app process is running in its mount namespace.
133  */
134 static bool gInAppMountNamespace = false;
135 
136 /**
137  * The maximum number of characters (not including a null terminator) that a
138  * process name may contain.
139  */
140 static constexpr size_t MAX_NAME_LENGTH = 15;
141 
142 /**
143  * The file descriptor for the Zygote socket opened by init.
144  */
145 
146 static int gZygoteSocketFD = -1;
147 
148 /**
149  * The file descriptor for the unspecialized app process (USAP) pool socket opened by init.
150  */
151 
152 static int gUsapPoolSocketFD = -1;
153 
154 /**
155  * The number of USAPs currently in this Zygote's pool.
156  */
157 static std::atomic_uint32_t gUsapPoolCount = 0;
158 
159 /**
160  * Event file descriptor used to communicate reaped USAPs to the
161  * ZygoteServer.
162  */
163 static int gUsapPoolEventFD = -1;
164 
165 /**
166  * The socket file descriptor used to send notifications to the
167  * system_server.
168  */
169 static int gSystemServerSocketFd = -1;
170 
171 static constexpr int DEFAULT_DATA_DIR_PERMISSION = 0751;
172 
173 static constexpr const uint64_t UPPER_HALF_WORD_MASK = 0xFFFF'FFFF'0000'0000;
174 static constexpr const uint64_t LOWER_HALF_WORD_MASK = 0x0000'0000'FFFF'FFFF;
175 
176 static constexpr const char* kCurProfileDirPath = "/data/misc/profiles/cur";
177 static constexpr const char* kRefProfileDirPath = "/data/misc/profiles/ref";
178 
179 /**
180  * The maximum value that the gUSAPPoolSizeMax variable may take.  This value
181  * is a mirror of ZygoteServer.USAP_POOL_SIZE_MAX_LIMIT
182  */
183 static constexpr int USAP_POOL_SIZE_MAX_LIMIT = 100;
184 
185 /** The numeric value for the maximum priority a process may possess. */
186 static constexpr int PROCESS_PRIORITY_MAX = -20;
187 
188 /** The numeric value for the minimum priority a process may possess. */
189 static constexpr int PROCESS_PRIORITY_MIN = 19;
190 
191 /** The numeric value for the normal priority a process should have. */
192 static constexpr int PROCESS_PRIORITY_DEFAULT = 0;
193 
194 /** Exponential back off parameters for storage dir check. */
195 static constexpr unsigned int STORAGE_DIR_CHECK_RETRY_MULTIPLIER = 2;
196 static constexpr unsigned int STORAGE_DIR_CHECK_INIT_INTERVAL_US = 50;
197 static constexpr unsigned int STORAGE_DIR_CHECK_MAX_INTERVAL_US = 1000;
198 /**
199  * Lower bound time we allow storage dir check to sleep.
200  * If it exceeds 2s, PROC_START_TIMEOUT_MSG will kill the starting app anyway,
201  * so it's fine to assume max retries is 5 mins.
202  */
203 static constexpr int STORAGE_DIR_CHECK_TIMEOUT_US = 1000 * 1000 * 60 * 5;
204 
205 /**
206  * A helper class containing accounting information for USAPs.
207  */
208 class UsapTableEntry {
209  public:
210   struct EntryStorage {
211     int32_t pid;
212     int32_t read_pipe_fd;
213 
operator !=__anon320304960111::UsapTableEntry::EntryStorage214     bool operator!=(const EntryStorage& other) {
215       return pid != other.pid || read_pipe_fd != other.read_pipe_fd;
216     }
217   };
218 
219  private:
220   static constexpr EntryStorage INVALID_ENTRY_VALUE = {-1, -1};
221 
222   std::atomic<EntryStorage> mStorage;
223   static_assert(decltype(mStorage)::is_always_lock_free);  // Accessed from signal handler.
224 
225  public:
UsapTableEntry()226   constexpr UsapTableEntry() : mStorage(INVALID_ENTRY_VALUE) {}
227 
228   /**
229    * If the provided PID matches the one stored in this entry, the entry will
230    * be invalidated and the associated file descriptor will be closed.  If the
231    * PIDs don't match nothing will happen.
232    *
233    * @param pid The ID of the process who's entry we want to clear.
234    * @return True if the entry was cleared by this call; false otherwise
235    */
ClearForPID(int32_t pid)236   bool ClearForPID(int32_t pid) {
237     EntryStorage storage = mStorage.load();
238 
239     if (storage.pid == pid) {
240       /*
241        * There are three possible outcomes from this compare-and-exchange:
242        *   1) It succeeds, in which case we close the FD
243        *   2) It fails and the new value is INVALID_ENTRY_VALUE, in which case
244        *      the entry has already been cleared.
245        *   3) It fails and the new value isn't INVALID_ENTRY_VALUE, in which
246        *      case the entry has already been cleared and re-used.
247        *
248        * In all three cases the goal of the caller has been met, but only in
249        * the first case do we need to decrement the pool count.
250        */
251       if (mStorage.compare_exchange_strong(storage, INVALID_ENTRY_VALUE)) {
252         close(storage.read_pipe_fd);
253         return true;
254       } else {
255         return false;
256       }
257 
258     } else {
259       return false;
260     }
261   }
262 
Clear()263   void Clear() {
264     EntryStorage storage = mStorage.load();
265 
266     if (storage != INVALID_ENTRY_VALUE) {
267       close(storage.read_pipe_fd);
268       mStorage.store(INVALID_ENTRY_VALUE);
269     }
270   }
271 
Invalidate()272   void Invalidate() {
273     mStorage.store(INVALID_ENTRY_VALUE);
274   }
275 
276   /**
277    * @return A copy of the data stored in this entry.
278    */
GetValues()279   std::optional<EntryStorage> GetValues() {
280     EntryStorage storage = mStorage.load();
281 
282     if (storage != INVALID_ENTRY_VALUE) {
283       return storage;
284     } else {
285       return std::nullopt;
286     }
287   }
288 
289   /**
290    * Sets the entry to the given values if it is currently invalid.
291    *
292    * @param pid  The process ID for the new entry.
293    * @param read_pipe_fd  The read end of the USAP control pipe for this
294    * process.
295    * @return True if the entry was set; false otherwise.
296    */
SetIfInvalid(int32_t pid,int32_t read_pipe_fd)297   bool SetIfInvalid(int32_t pid, int32_t read_pipe_fd) {
298     EntryStorage new_value_storage;
299 
300     new_value_storage.pid = pid;
301     new_value_storage.read_pipe_fd = read_pipe_fd;
302 
303     EntryStorage expected = INVALID_ENTRY_VALUE;
304 
305     return mStorage.compare_exchange_strong(expected, new_value_storage);
306   }
307 };
308 
309 /**
310  * A table containing information about the USAPs currently in the pool.
311  *
312  * Multiple threads may be attempting to modify the table, either from the
313  * signal handler or from the ZygoteServer poll loop.  Atomic loads/stores in
314  * the USAPTableEntry class prevent data races during these concurrent
315  * operations.
316  */
317 static std::array<UsapTableEntry, USAP_POOL_SIZE_MAX_LIMIT> gUsapTable;
318 
319 /**
320  * The list of open zygote file descriptors.
321  */
322 static FileDescriptorTable* gOpenFdTable = nullptr;
323 
324 // Must match values in com.android.internal.os.Zygote.
325 // The values should be consistent with IVold.aidl
326 enum MountExternalKind {
327     MOUNT_EXTERNAL_NONE = 0,
328     MOUNT_EXTERNAL_DEFAULT = 1,
329     MOUNT_EXTERNAL_INSTALLER = 2,
330     MOUNT_EXTERNAL_PASS_THROUGH = 3,
331     MOUNT_EXTERNAL_ANDROID_WRITABLE = 4,
332     MOUNT_EXTERNAL_COUNT = 5
333 };
334 
335 // Must match values in com.android.internal.os.Zygote.
336 enum RuntimeFlags : uint32_t {
337     DEBUG_ENABLE_JDWP = 1,
338     PROFILE_FROM_SHELL = 1 << 15,
339     MEMORY_TAG_LEVEL_MASK = (1 << 19) | (1 << 20),
340     MEMORY_TAG_LEVEL_TBI = 1 << 19,
341     MEMORY_TAG_LEVEL_ASYNC = 2 << 19,
342     MEMORY_TAG_LEVEL_SYNC = 3 << 19,
343     GWP_ASAN_LEVEL_MASK = (1 << 21) | (1 << 22),
344     GWP_ASAN_LEVEL_NEVER = 0 << 21,
345     GWP_ASAN_LEVEL_LOTTERY = 1 << 21,
346     GWP_ASAN_LEVEL_ALWAYS = 2 << 21,
347     NATIVE_HEAP_ZERO_INIT = 1 << 23,
348     PROFILEABLE = 1 << 24,
349 };
350 
351 enum UnsolicitedZygoteMessageTypes : uint32_t {
352     UNSOLICITED_ZYGOTE_MESSAGE_TYPE_RESERVED = 0,
353     UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD = 1,
354 };
355 
356 struct UnsolicitedZygoteMessageSigChld {
357     struct {
358         UnsolicitedZygoteMessageTypes type;
359     } header;
360     struct {
361         pid_t pid;
362         uid_t uid;
363         int status;
364     } payload;
365 };
366 
367 // Keep sync with services/core/java/com/android/server/am/ProcessList.java
368 static constexpr struct sockaddr_un kSystemServerSockAddr =
369         {.sun_family = AF_LOCAL, .sun_path = "/data/system/unsolzygotesocket"};
370 
371 // Forward declaration so we don't have to move the signal handler.
372 static bool RemoveUsapTableEntry(pid_t usap_pid);
373 
RuntimeAbort(JNIEnv * env,int line,const char * msg)374 static void RuntimeAbort(JNIEnv* env, int line, const char* msg) {
375   std::ostringstream oss;
376   oss << __FILE__ << ":" << line << ": " << msg;
377   env->FatalError(oss.str().c_str());
378 }
379 
380 // Create the socket which is going to be used to send unsolicited message
381 // to system_server, the socket will be closed post forking a child process.
382 // It's expected to be called at each zygote's initialization.
initUnsolSocketToSystemServer()383 static void initUnsolSocketToSystemServer() {
384     gSystemServerSocketFd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_NONBLOCK, 0);
385     if (gSystemServerSocketFd >= 0) {
386         ALOGV("Zygote:systemServerSocketFD = %d", gSystemServerSocketFd);
387     } else {
388         ALOGE("Unable to create socket file descriptor to connect to system_server");
389     }
390 }
391 
sendSigChildStatus(const pid_t pid,const uid_t uid,const int status)392 static void sendSigChildStatus(const pid_t pid, const uid_t uid, const int status) {
393     int socketFd = gSystemServerSocketFd;
394     if (socketFd >= 0) {
395         // fill the message buffer
396         struct UnsolicitedZygoteMessageSigChld data =
397                 {.header = {.type = UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD},
398                  .payload = {.pid = pid, .uid = uid, .status = status}};
399         if (TEMP_FAILURE_RETRY(
400                     sendto(socketFd, &data, sizeof(data), 0,
401                            reinterpret_cast<const struct sockaddr*>(&kSystemServerSockAddr),
402                            sizeof(kSystemServerSockAddr))) == -1) {
403             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
404                                   "Zygote failed to write to system_server FD: %s",
405                                   strerror(errno));
406         }
407     }
408 }
409 
410 // This signal handler is for zygote mode, since the zygote must reap its children
SigChldHandler(int,siginfo_t * info,void *)411 static void SigChldHandler(int /*signal_number*/, siginfo_t* info, void* /*ucontext*/) {
412     pid_t pid;
413     int status;
414     int64_t usaps_removed = 0;
415 
416     // It's necessary to save and restore the errno during this function.
417     // Since errno is stored per thread, changing it here modifies the errno
418     // on the thread on which this signal handler executes. If a signal occurs
419     // between a call and an errno check, it's possible to get the errno set
420     // here.
421     // See b/23572286 for extra information.
422     int saved_errno = errno;
423 
424     while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
425         // Notify system_server that we received a SIGCHLD
426         sendSigChildStatus(pid, info->si_uid, status);
427         // Log process-death status that we care about.
428         if (WIFEXITED(status)) {
429             async_safe_format_log(ANDROID_LOG_INFO, LOG_TAG, "Process %d exited cleanly (%d)", pid,
430                                   WEXITSTATUS(status));
431 
432             // Check to see if the PID is in the USAP pool and remove it if it is.
433             if (RemoveUsapTableEntry(pid)) {
434                 ++usaps_removed;
435             }
436         } else if (WIFSIGNALED(status)) {
437             async_safe_format_log(ANDROID_LOG_INFO, LOG_TAG,
438                                   "Process %d exited due to signal %d (%s)%s", pid,
439                                   WTERMSIG(status), strsignal(WTERMSIG(status)),
440                                   WCOREDUMP(status) ? "; core dumped" : "");
441 
442             // If the process exited due to a signal other than SIGTERM, check to see
443             // if the PID is in the USAP pool and remove it if it is.  If the process
444             // was closed by the Zygote using SIGTERM then the USAP pool entry will
445             // have already been removed (see nativeEmptyUsapPool()).
446             if (WTERMSIG(status) != SIGTERM && RemoveUsapTableEntry(pid)) {
447                 ++usaps_removed;
448             }
449         }
450 
451         // If the just-crashed process is the system_server, bring down zygote
452         // so that it is restarted by init and system server will be restarted
453         // from there.
454         if (pid == gSystemServerPid) {
455             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
456                                   "Exit zygote because system server (pid %d) has terminated", pid);
457             kill(getpid(), SIGKILL);
458         }
459     }
460 
461     // Note that we shouldn't consider ECHILD an error because
462     // the secondary zygote might have no children left to wait for.
463     if (pid < 0 && errno != ECHILD) {
464         async_safe_format_log(ANDROID_LOG_WARN, LOG_TAG, "Zygote SIGCHLD error in waitpid: %s",
465                               strerror(errno));
466     }
467 
468     if (usaps_removed > 0) {
469         if (TEMP_FAILURE_RETRY(write(gUsapPoolEventFD, &usaps_removed, sizeof(usaps_removed))) ==
470             -1) {
471             // If this write fails something went terribly wrong.  We will now kill
472             // the zygote and let the system bring it back up.
473             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
474                                   "Zygote failed to write to USAP pool event FD: %s",
475                                   strerror(errno));
476             kill(getpid(), SIGKILL);
477         }
478     }
479 
480     errno = saved_errno;
481 }
482 
483 // Configures the SIGCHLD/SIGHUP handlers for the zygote process. This is
484 // configured very late, because earlier in the runtime we may fork() and
485 // exec() other processes, and we want to waitpid() for those rather than
486 // have them be harvested immediately.
487 //
488 // Ignore SIGHUP because all processes forked by the zygote are in the same
489 // process group as the zygote and we don't want to be notified if we become
490 // an orphaned group and have one or more stopped processes. This is not a
491 // theoretical concern :
492 // - we can become an orphaned group if one of our direct descendants forks
493 //   and is subsequently killed before its children.
494 // - crash_dump routinely STOPs the process it's tracing.
495 //
496 // See issues b/71965619 and b/25567761 for further details.
497 //
498 // This ends up being called repeatedly before each fork(), but there's
499 // no real harm in that.
SetSignalHandlers()500 static void SetSignalHandlers() {
501     struct sigaction sig_chld = {.sa_flags = SA_SIGINFO, .sa_sigaction = SigChldHandler};
502 
503     if (sigaction(SIGCHLD, &sig_chld, nullptr) < 0) {
504         ALOGW("Error setting SIGCHLD handler: %s", strerror(errno));
505     }
506 
507   struct sigaction sig_hup = {};
508   sig_hup.sa_handler = SIG_IGN;
509   if (sigaction(SIGHUP, &sig_hup, nullptr) < 0) {
510     ALOGW("Error setting SIGHUP handler: %s", strerror(errno));
511   }
512 }
513 
514 // Sets the SIGCHLD handler back to default behavior in zygote children.
UnsetChldSignalHandler()515 static void UnsetChldSignalHandler() {
516   struct sigaction sa;
517   memset(&sa, 0, sizeof(sa));
518   sa.sa_handler = SIG_DFL;
519 
520   if (sigaction(SIGCHLD, &sa, nullptr) < 0) {
521     ALOGW("Error unsetting SIGCHLD handler: %s", strerror(errno));
522   }
523 }
524 
525 // Calls POSIX setgroups() using the int[] object as an argument.
526 // A nullptr argument is tolerated.
SetGids(JNIEnv * env,jintArray managed_gids,jboolean is_child_zygote,fail_fn_t fail_fn)527 static void SetGids(JNIEnv* env, jintArray managed_gids, jboolean is_child_zygote,
528                     fail_fn_t fail_fn) {
529   if (managed_gids == nullptr) {
530     if (is_child_zygote) {
531       // For child zygotes like webview and app zygote, we want to clear out
532       // any supplemental groups the parent zygote had.
533       if (setgroups(0, NULL) == -1) {
534         fail_fn(CREATE_ERROR("Failed to remove supplementary groups for child zygote"));
535       }
536     }
537     return;
538   }
539 
540   ScopedIntArrayRO gids(env, managed_gids);
541   if (gids.get() == nullptr) {
542     fail_fn(CREATE_ERROR("Getting gids int array failed"));
543   }
544 
545   if (setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0])) == -1) {
546     fail_fn(CREATE_ERROR("setgroups failed: %s, gids.size=%zu", strerror(errno), gids.size()));
547   }
548 }
549 
ensureInAppMountNamespace(fail_fn_t fail_fn)550 static void ensureInAppMountNamespace(fail_fn_t fail_fn) {
551   if (gInAppMountNamespace) {
552     // In app mount namespace already
553     return;
554   }
555   if (unshare(CLONE_NEWNS) == -1) {
556     fail_fn(CREATE_ERROR("Failed to unshare(): %s", strerror(errno)));
557   }
558   gInAppMountNamespace = true;
559 }
560 
561 // Sets the resource limits via setrlimit(2) for the values in the
562 // two-dimensional array of integers that's passed in. The second dimension
563 // contains a tuple of length 3: (resource, rlim_cur, rlim_max). nullptr is
564 // treated as an empty array.
SetRLimits(JNIEnv * env,jobjectArray managed_rlimits,fail_fn_t fail_fn)565 static void SetRLimits(JNIEnv* env, jobjectArray managed_rlimits, fail_fn_t fail_fn) {
566   if (managed_rlimits == nullptr) {
567     return;
568   }
569 
570   rlimit rlim;
571   memset(&rlim, 0, sizeof(rlim));
572 
573   for (int i = 0; i < env->GetArrayLength(managed_rlimits); ++i) {
574     ScopedLocalRef<jobject>
575         managed_rlimit_object(env, env->GetObjectArrayElement(managed_rlimits, i));
576     ScopedIntArrayRO rlimit_handle(env, reinterpret_cast<jintArray>(managed_rlimit_object.get()));
577 
578     if (rlimit_handle.size() != 3) {
579       fail_fn(CREATE_ERROR("rlimits array must have a second dimension of size 3"));
580     }
581 
582     rlim.rlim_cur = rlimit_handle[1];
583     rlim.rlim_max = rlimit_handle[2];
584 
585     if (setrlimit(rlimit_handle[0], &rlim) == -1) {
586       fail_fn(CREATE_ERROR("setrlimit(%d, {%ld, %ld}) failed",
587                            rlimit_handle[0], rlim.rlim_cur, rlim.rlim_max));
588     }
589   }
590 }
591 
EnableDebugger()592 static void EnableDebugger() {
593   // To let a non-privileged gdbserver attach to this
594   // process, we must set our dumpable flag.
595   if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
596     ALOGE("prctl(PR_SET_DUMPABLE) failed");
597   }
598 
599   // A non-privileged native debugger should be able to attach to the debuggable app, even if Yama
600   // is enabled (see kernel/Documentation/security/Yama.txt).
601   if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == -1) {
602     // if Yama is off prctl(PR_SET_PTRACER) returns EINVAL - don't log in this
603     // case since it's expected behaviour.
604     if (errno != EINVAL) {
605       ALOGE("prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) failed");
606     }
607   }
608 
609   // Set the core dump size to zero unless wanted (see also coredump_setup in build/envsetup.sh).
610   if (!GetBoolProperty("persist.zygote.core_dump", false)) {
611     // Set the soft limit on core dump size to 0 without changing the hard limit.
612     rlimit rl;
613     if (getrlimit(RLIMIT_CORE, &rl) == -1) {
614       ALOGE("getrlimit(RLIMIT_CORE) failed");
615     } else {
616       rl.rlim_cur = 0;
617       if (setrlimit(RLIMIT_CORE, &rl) == -1) {
618         ALOGE("setrlimit(RLIMIT_CORE) failed");
619       }
620     }
621   }
622 }
623 
PreApplicationInit()624 static void PreApplicationInit() {
625   // The child process sets this to indicate it's not the zygote.
626   android_mallopt(M_SET_ZYGOTE_CHILD, nullptr, 0);
627 
628   // Set the jemalloc decay time to 1.
629   mallopt(M_DECAY_TIME, 1);
630 }
631 
SetUpSeccompFilter(uid_t uid,bool is_child_zygote)632 static void SetUpSeccompFilter(uid_t uid, bool is_child_zygote) {
633   if (!gIsSecurityEnforced) {
634     ALOGI("seccomp disabled by setenforce 0");
635     return;
636   }
637 
638   // Apply system or app filter based on uid.
639   if (uid >= AID_APP_START) {
640     if (is_child_zygote) {
641       set_app_zygote_seccomp_filter();
642     } else {
643       set_app_seccomp_filter();
644     }
645   } else {
646     set_system_seccomp_filter();
647   }
648 }
649 
EnableKeepCapabilities(fail_fn_t fail_fn)650 static void EnableKeepCapabilities(fail_fn_t fail_fn) {
651   if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == -1) {
652     fail_fn(CREATE_ERROR("prctl(PR_SET_KEEPCAPS) failed: %s", strerror(errno)));
653   }
654 }
655 
DropCapabilitiesBoundingSet(fail_fn_t fail_fn)656 static void DropCapabilitiesBoundingSet(fail_fn_t fail_fn) {
657   for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {;
658     if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0) == -1) {
659       if (errno == EINVAL) {
660         ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
661               "your kernel is compiled with file capabilities support");
662       } else {
663         fail_fn(CREATE_ERROR("prctl(PR_CAPBSET_DROP, %d) failed: %s", i, strerror(errno)));
664       }
665     }
666   }
667 }
668 
SetInheritable(uint64_t inheritable,fail_fn_t fail_fn)669 static void SetInheritable(uint64_t inheritable, fail_fn_t fail_fn) {
670   __user_cap_header_struct capheader;
671   memset(&capheader, 0, sizeof(capheader));
672   capheader.version = _LINUX_CAPABILITY_VERSION_3;
673   capheader.pid = 0;
674 
675   __user_cap_data_struct capdata[2];
676   if (capget(&capheader, &capdata[0]) == -1) {
677     fail_fn(CREATE_ERROR("capget failed: %s", strerror(errno)));
678   }
679 
680   capdata[0].inheritable = inheritable;
681   capdata[1].inheritable = inheritable >> 32;
682 
683   if (capset(&capheader, &capdata[0]) == -1) {
684     fail_fn(CREATE_ERROR("capset(inh=%" PRIx64 ") failed: %s", inheritable, strerror(errno)));
685   }
686 }
687 
SetCapabilities(uint64_t permitted,uint64_t effective,uint64_t inheritable,fail_fn_t fail_fn)688 static void SetCapabilities(uint64_t permitted, uint64_t effective, uint64_t inheritable,
689                             fail_fn_t fail_fn) {
690   __user_cap_header_struct capheader;
691   memset(&capheader, 0, sizeof(capheader));
692   capheader.version = _LINUX_CAPABILITY_VERSION_3;
693   capheader.pid = 0;
694 
695   __user_cap_data_struct capdata[2];
696   memset(&capdata, 0, sizeof(capdata));
697   capdata[0].effective = effective;
698   capdata[1].effective = effective >> 32;
699   capdata[0].permitted = permitted;
700   capdata[1].permitted = permitted >> 32;
701   capdata[0].inheritable = inheritable;
702   capdata[1].inheritable = inheritable >> 32;
703 
704   if (capset(&capheader, &capdata[0]) == -1) {
705     fail_fn(CREATE_ERROR("capset(perm=%" PRIx64 ", eff=%" PRIx64 ", inh=%" PRIx64 ") "
706                          "failed: %s", permitted, effective, inheritable, strerror(errno)));
707   }
708 }
709 
SetSchedulerPolicy(fail_fn_t fail_fn,bool is_top_app)710 static void SetSchedulerPolicy(fail_fn_t fail_fn, bool is_top_app) {
711   SchedPolicy policy = is_top_app ? SP_TOP_APP : SP_DEFAULT;
712 
713   if (is_top_app && cpusets_enabled()) {
714     errno = -set_cpuset_policy(0, policy);
715     if (errno != 0) {
716       fail_fn(CREATE_ERROR("set_cpuset_policy(0, %d) failed: %s", policy, strerror(errno)));
717     }
718   }
719 
720   errno = -set_sched_policy(0, policy);
721   if (errno != 0) {
722     fail_fn(CREATE_ERROR("set_sched_policy(0, %d) failed: %s", policy, strerror(errno)));
723   }
724 
725   // We are going to lose the permission to set scheduler policy during the specialization, so make
726   // sure that we don't cache the fd of cgroup path that may cause sepolicy violation by writing
727   // value to the cached fd directly when creating new thread.
728   DropTaskProfilesResourceCaching();
729 }
730 
UnmountTree(const char * path)731 static int UnmountTree(const char* path) {
732   ATRACE_CALL();
733 
734   size_t path_len = strlen(path);
735 
736   FILE* fp = setmntent("/proc/mounts", "r");
737   if (fp == nullptr) {
738     ALOGE("Error opening /proc/mounts: %s", strerror(errno));
739     return -errno;
740   }
741 
742   // Some volumes can be stacked on each other, so force unmount in
743   // reverse order to give us the best chance of success.
744   std::list<std::string> to_unmount;
745   mntent* mentry;
746   while ((mentry = getmntent(fp)) != nullptr) {
747     if (strncmp(mentry->mnt_dir, path, path_len) == 0) {
748       to_unmount.push_front(std::string(mentry->mnt_dir));
749     }
750   }
751   endmntent(fp);
752 
753   for (const auto& path : to_unmount) {
754     if (umount2(path.c_str(), MNT_DETACH)) {
755       ALOGW("Failed to unmount %s: %s", path.c_str(), strerror(errno));
756     }
757   }
758   return 0;
759 }
760 
PrepareDir(const std::string & dir,mode_t mode,uid_t uid,gid_t gid,fail_fn_t fail_fn)761 static void PrepareDir(const std::string& dir, mode_t mode, uid_t uid, gid_t gid,
762                       fail_fn_t fail_fn) {
763   if (fs_prepare_dir(dir.c_str(), mode, uid, gid) != 0) {
764     fail_fn(CREATE_ERROR("fs_prepare_dir failed on %s: %s",
765                          dir.c_str(), strerror(errno)));
766   }
767 }
768 
PrepareDirIfNotPresent(const std::string & dir,mode_t mode,uid_t uid,gid_t gid,fail_fn_t fail_fn)769 static void PrepareDirIfNotPresent(const std::string& dir, mode_t mode, uid_t uid, gid_t gid,
770                       fail_fn_t fail_fn) {
771   struct stat sb;
772   if (TEMP_FAILURE_RETRY(stat(dir.c_str(), &sb)) != -1) {
773     // Directory exists already
774     return;
775   }
776   PrepareDir(dir, mode, uid, gid, fail_fn);
777 }
778 
BindMount(const std::string & source_dir,const std::string & target_dir)779 static bool BindMount(const std::string& source_dir, const std::string& target_dir) {
780   return !(TEMP_FAILURE_RETRY(mount(source_dir.c_str(), target_dir.c_str(), nullptr,
781                                     MS_BIND | MS_REC, nullptr)) == -1);
782 }
783 
BindMount(const std::string & source_dir,const std::string & target_dir,fail_fn_t fail_fn)784 static void BindMount(const std::string& source_dir, const std::string& target_dir,
785                       fail_fn_t fail_fn) {
786   if (!BindMount(source_dir, target_dir)) {
787     fail_fn(CREATE_ERROR("Failed to mount %s to %s: %s",
788                          source_dir.c_str(), target_dir.c_str(), strerror(errno)));
789   }
790 }
791 
MountAppDataTmpFs(const std::string & target_dir,fail_fn_t fail_fn)792 static void MountAppDataTmpFs(const std::string& target_dir,
793                       fail_fn_t fail_fn) {
794   if (TEMP_FAILURE_RETRY(mount("tmpfs", target_dir.c_str(), "tmpfs",
795                                MS_NOSUID | MS_NODEV | MS_NOEXEC, "uid=0,gid=0,mode=0751")) == -1) {
796     fail_fn(CREATE_ERROR("Failed to mount tmpfs to %s: %s",
797                          target_dir.c_str(), strerror(errno)));
798   }
799 }
800 
801 // Create a private mount namespace and bind mount appropriate emulated
802 // storage for the given user.
MountEmulatedStorage(uid_t uid,jint mount_mode,bool force_mount_namespace,fail_fn_t fail_fn)803 static void MountEmulatedStorage(uid_t uid, jint mount_mode,
804         bool force_mount_namespace,
805         fail_fn_t fail_fn) {
806   // See storage config details at http://source.android.com/tech/storage/
807   ATRACE_CALL();
808 
809   if (mount_mode < 0 || mount_mode >= MOUNT_EXTERNAL_COUNT) {
810     fail_fn(CREATE_ERROR("Unknown mount_mode: %d", mount_mode));
811   }
812 
813   if (mount_mode == MOUNT_EXTERNAL_NONE && !force_mount_namespace) {
814     // Valid default of no storage visible
815     return;
816   }
817 
818   // Create a second private mount namespace for our process
819   ensureInAppMountNamespace(fail_fn);
820 
821   // Handle force_mount_namespace with MOUNT_EXTERNAL_NONE.
822   if (mount_mode == MOUNT_EXTERNAL_NONE) {
823     return;
824   }
825 
826   const userid_t user_id = multiuser_get_user_id(uid);
827   const std::string user_source = StringPrintf("/mnt/user/%d", user_id);
828   // Shell is neither AID_ROOT nor AID_EVERYBODY. Since it equally needs 'execute' access to
829   // /mnt/user/0 to 'adb shell ls /sdcard' for instance, we set the uid bit of /mnt/user/0 to
830   // AID_SHELL. This gives shell access along with apps running as group everybody (user 0 apps)
831   // These bits should be consistent with what is set in vold in
832   // Utils#MountUserFuse on FUSE volume mount
833   PrepareDir(user_source, 0710, user_id ? AID_ROOT : AID_SHELL,
834              multiuser_get_uid(user_id, AID_EVERYBODY), fail_fn);
835 
836   bool isAppDataIsolationEnabled = GetBoolProperty(kVoldAppDataIsolation, false);
837 
838   if (mount_mode == MOUNT_EXTERNAL_PASS_THROUGH) {
839       const std::string pass_through_source = StringPrintf("/mnt/pass_through/%d", user_id);
840       PrepareDir(pass_through_source, 0710, AID_ROOT, AID_MEDIA_RW, fail_fn);
841       BindMount(pass_through_source, "/storage", fail_fn);
842   } else if (mount_mode == MOUNT_EXTERNAL_INSTALLER) {
843       const std::string installer_source = StringPrintf("/mnt/installer/%d", user_id);
844       BindMount(installer_source, "/storage", fail_fn);
845   } else if (isAppDataIsolationEnabled && mount_mode == MOUNT_EXTERNAL_ANDROID_WRITABLE) {
846       const std::string writable_source = StringPrintf("/mnt/androidwritable/%d", user_id);
847       BindMount(writable_source, "/storage", fail_fn);
848   } else {
849       BindMount(user_source, "/storage", fail_fn);
850   }
851 }
852 
NeedsNoRandomizeWorkaround()853 static bool NeedsNoRandomizeWorkaround() {
854 #if !defined(__arm__)
855     return false;
856 #else
857     int major;
858     int minor;
859     struct utsname uts;
860     if (uname(&uts) == -1) {
861         return false;
862     }
863 
864     if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
865         return false;
866     }
867 
868     // Kernels before 3.4.* need the workaround.
869     return (major < 3) || ((major == 3) && (minor < 4));
870 #endif
871 }
872 
873 // Utility to close down the Zygote socket file descriptors while
874 // the child is still running as root with Zygote's privileges.  Each
875 // descriptor (if any) is closed via dup3(), replacing it with a valid
876 // (open) descriptor to /dev/null.
877 
DetachDescriptors(JNIEnv * env,const std::vector<int> & fds_to_close,fail_fn_t fail_fn)878 static void DetachDescriptors(JNIEnv* env,
879                               const std::vector<int>& fds_to_close,
880                               fail_fn_t fail_fn) {
881 
882   if (fds_to_close.size() > 0) {
883     android::base::unique_fd devnull_fd(open("/dev/null", O_RDWR | O_CLOEXEC));
884     if (devnull_fd == -1) {
885       fail_fn(std::string("Failed to open /dev/null: ").append(strerror(errno)));
886     }
887 
888     for (int fd : fds_to_close) {
889       ALOGV("Switching descriptor %d to /dev/null", fd);
890       if (TEMP_FAILURE_RETRY(dup3(devnull_fd, fd, O_CLOEXEC)) == -1) {
891         fail_fn(StringPrintf("Failed dup3() on descriptor %d: %s", fd, strerror(errno)));
892       }
893     }
894   }
895 }
896 
SetThreadName(const std::string & thread_name)897 void SetThreadName(const std::string& thread_name) {
898   bool hasAt = false;
899   bool hasDot = false;
900 
901   for (const char str_el : thread_name) {
902     if (str_el == '.') {
903       hasDot = true;
904     } else if (str_el == '@') {
905       hasAt = true;
906     }
907   }
908 
909   const char* name_start_ptr = thread_name.c_str();
910   if (thread_name.length() >= MAX_NAME_LENGTH && !hasAt && hasDot) {
911     name_start_ptr += thread_name.length() - MAX_NAME_LENGTH;
912   }
913 
914   // pthread_setname_np fails rather than truncating long strings.
915   char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
916   strlcpy(buf, name_start_ptr, sizeof(buf) - 1);
917   errno = pthread_setname_np(pthread_self(), buf);
918   if (errno != 0) {
919     ALOGW("Unable to set the name of current thread to '%s': %s", buf, strerror(errno));
920   }
921   // Update base::logging default tag.
922   android::base::SetDefaultTag(buf);
923 }
924 
925 /**
926  * A helper method for converting managed strings to native strings.  A fatal
927  * error is generated if a problem is encountered in extracting a non-null
928  * string.
929  *
930  * @param env  Managed runtime environment
931  * @param process_name  A native representation of the process name
932  * @param managed_process_name  A managed representation of the process name
933  * @param managed_string  The managed string to extract
934  *
935  * @return An empty option if the managed string is null.  A optional-wrapped
936  * string otherwise.
937  */
ExtractJString(JNIEnv * env,const char * process_name,jstring managed_process_name,jstring managed_string)938 static std::optional<std::string> ExtractJString(JNIEnv* env,
939                                                  const char* process_name,
940                                                  jstring managed_process_name,
941                                                  jstring managed_string) {
942   if (managed_string == nullptr) {
943     return std::nullopt;
944   } else {
945     ScopedUtfChars scoped_string_chars(env, managed_string);
946 
947     if (scoped_string_chars.c_str() != nullptr) {
948       return std::optional<std::string>(scoped_string_chars.c_str());
949     } else {
950       ZygoteFailure(env, process_name, managed_process_name, "Failed to extract JString.");
951     }
952   }
953 }
954 
955 /**
956  * A helper method for converting managed string arrays to native vectors.  A
957  * fatal error is generated if a problem is encountered in extracting a non-null array.
958  *
959  * @param env  Managed runtime environment
960  * @param process_name  A native representation of the process name
961  * @param managed_process_name  A managed representation of the process name
962  * @param managed_array  The managed integer array to extract
963  *
964  * @return An empty option if the managed array is null.  A optional-wrapped
965  * vector otherwise.
966  */
ExtractJIntArray(JNIEnv * env,const char * process_name,jstring managed_process_name,jintArray managed_array)967 static std::optional<std::vector<int>> ExtractJIntArray(JNIEnv* env,
968                                                         const char* process_name,
969                                                         jstring managed_process_name,
970                                                         jintArray managed_array) {
971   if (managed_array == nullptr) {
972     return std::nullopt;
973   } else {
974     ScopedIntArrayRO managed_array_handle(env, managed_array);
975 
976     if (managed_array_handle.get() != nullptr) {
977       std::vector<int> native_array;
978       native_array.reserve(managed_array_handle.size());
979 
980       for (size_t array_index = 0; array_index < managed_array_handle.size(); ++array_index) {
981         native_array.push_back(managed_array_handle[array_index]);
982       }
983 
984       return std::move(native_array);
985 
986     } else {
987       ZygoteFailure(env, process_name, managed_process_name, "Failed to extract JIntArray.");
988     }
989   }
990 }
991 
992 /**
993  * A utility function for blocking signals.
994  *
995  * @param signum  Signal number to block
996  * @param fail_fn  Fatal error reporting function
997  *
998  * @see ZygoteFailure
999  */
BlockSignal(int signum,fail_fn_t fail_fn)1000 static void BlockSignal(int signum, fail_fn_t fail_fn) {
1001   sigset_t sigs;
1002   sigemptyset(&sigs);
1003   sigaddset(&sigs, signum);
1004 
1005   if (sigprocmask(SIG_BLOCK, &sigs, nullptr) == -1) {
1006     fail_fn(CREATE_ERROR("Failed to block signal %s: %s", strsignal(signum), strerror(errno)));
1007   }
1008 }
1009 
1010 
1011 /**
1012  * A utility function for unblocking signals.
1013  *
1014  * @param signum  Signal number to unblock
1015  * @param fail_fn  Fatal error reporting function
1016  *
1017  * @see ZygoteFailure
1018  */
UnblockSignal(int signum,fail_fn_t fail_fn)1019 static void UnblockSignal(int signum, fail_fn_t fail_fn) {
1020   sigset_t sigs;
1021   sigemptyset(&sigs);
1022   sigaddset(&sigs, signum);
1023 
1024   if (sigprocmask(SIG_UNBLOCK, &sigs, nullptr) == -1) {
1025     fail_fn(CREATE_ERROR("Failed to un-block signal %s: %s", strsignal(signum), strerror(errno)));
1026   }
1027 }
1028 
ClearUsapTable()1029 static void ClearUsapTable() {
1030   for (UsapTableEntry& entry : gUsapTable) {
1031     entry.Clear();
1032   }
1033 
1034   gUsapPoolCount = 0;
1035 }
1036 
1037 // Create an app data directory over tmpfs overlayed CE / DE storage, and bind mount it
1038 // from the actual app data directory in data mirror.
createAndMountAppData(std::string_view package_name,std::string_view mirror_pkg_dir_name,std::string_view mirror_data_path,std::string_view actual_data_path,fail_fn_t fail_fn,bool call_fail_fn)1039 static bool createAndMountAppData(std::string_view package_name,
1040     std::string_view mirror_pkg_dir_name, std::string_view mirror_data_path,
1041     std::string_view actual_data_path, fail_fn_t fail_fn, bool call_fail_fn) {
1042 
1043   char mirrorAppDataPath[PATH_MAX];
1044   char actualAppDataPath[PATH_MAX];
1045   snprintf(mirrorAppDataPath, PATH_MAX, "%s/%s", mirror_data_path.data(),
1046       mirror_pkg_dir_name.data());
1047   snprintf(actualAppDataPath, PATH_MAX, "%s/%s", actual_data_path.data(), package_name.data());
1048 
1049   PrepareDir(actualAppDataPath, 0700, AID_ROOT, AID_ROOT, fail_fn);
1050 
1051   // Bind mount from original app data directory in mirror.
1052   if (call_fail_fn) {
1053     BindMount(mirrorAppDataPath, actualAppDataPath, fail_fn);
1054   } else if(!BindMount(mirrorAppDataPath, actualAppDataPath)) {
1055     ALOGW("Failed to mount %s to %s: %s",
1056           mirrorAppDataPath, actualAppDataPath, strerror(errno));
1057     return false;
1058   }
1059   return true;
1060 }
1061 
1062 // There is an app data directory over tmpfs overlaid CE / DE storage
1063 // bind mount it from the actual app data directory in data mirror.
mountAppData(std::string_view package_name,std::string_view mirror_pkg_dir_name,std::string_view mirror_data_path,std::string_view actual_data_path,fail_fn_t fail_fn)1064 static void mountAppData(std::string_view package_name,
1065     std::string_view mirror_pkg_dir_name, std::string_view mirror_data_path,
1066     std::string_view actual_data_path, fail_fn_t fail_fn) {
1067 
1068   char mirrorAppDataPath[PATH_MAX];
1069   char actualAppDataPath[PATH_MAX];
1070   snprintf(mirrorAppDataPath, PATH_MAX, "%s/%s", mirror_data_path.data(),
1071       mirror_pkg_dir_name.data());
1072   snprintf(actualAppDataPath, PATH_MAX, "%s/%s", actual_data_path.data(), package_name.data());
1073 
1074   // Bind mount from original app data directory in mirror.
1075   BindMount(mirrorAppDataPath, actualAppDataPath, fail_fn);
1076 }
1077 
1078 // Get the directory name stored in /data/data. If device is unlocked it should be the same as
1079 // package name, otherwise it will be an encrypted name but with same inode number.
getAppDataDirName(std::string_view parent_path,std::string_view package_name,long long ce_data_inode,fail_fn_t fail_fn)1080 static std::string getAppDataDirName(std::string_view parent_path, std::string_view package_name,
1081       long long ce_data_inode, fail_fn_t fail_fn) {
1082   // Check if directory exists
1083   char tmpPath[PATH_MAX];
1084   snprintf(tmpPath, PATH_MAX, "%s/%s", parent_path.data(), package_name.data());
1085   struct stat s;
1086   int err = stat(tmpPath, &s);
1087   if (err == 0) {
1088     // Directory exists, so return the directory name
1089     return package_name.data();
1090   } else {
1091     if (errno != ENOENT) {
1092       fail_fn(CREATE_ERROR("Unexpected error in getAppDataDirName: %s", strerror(errno)));
1093       return nullptr;
1094     }
1095     {
1096       // Directory doesn't exist, try to search the name from inode
1097       std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(parent_path.data()), closedir);
1098       if (dir == nullptr) {
1099         fail_fn(CREATE_ERROR("Failed to opendir %s", parent_path.data()));
1100       }
1101       struct dirent* ent;
1102       while ((ent = readdir(dir.get()))) {
1103         if (ent->d_ino == ce_data_inode) {
1104           return ent->d_name;
1105         }
1106       }
1107     }
1108 
1109     // Fallback due to b/145989852, ce_data_inode stored in package manager may be corrupted
1110     // if ino_t is 32 bits.
1111     ino_t fixed_ce_data_inode = 0;
1112     if ((ce_data_inode & UPPER_HALF_WORD_MASK) == UPPER_HALF_WORD_MASK) {
1113       fixed_ce_data_inode = ce_data_inode & LOWER_HALF_WORD_MASK;
1114     } else if ((ce_data_inode & LOWER_HALF_WORD_MASK) == LOWER_HALF_WORD_MASK) {
1115       fixed_ce_data_inode = ((ce_data_inode >> 32) & LOWER_HALF_WORD_MASK);
1116     }
1117     if (fixed_ce_data_inode != 0) {
1118       std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(parent_path.data()), closedir);
1119       if (dir == nullptr) {
1120         fail_fn(CREATE_ERROR("Failed to opendir %s", parent_path.data()));
1121       }
1122       struct dirent* ent;
1123       while ((ent = readdir(dir.get()))) {
1124         if (ent->d_ino == fixed_ce_data_inode) {
1125           long long d_ino = ent->d_ino;
1126           ALOGW("Fallback success inode %lld -> %lld", ce_data_inode, d_ino);
1127           return ent->d_name;
1128         }
1129       }
1130     }
1131     // Fallback done
1132 
1133     fail_fn(CREATE_ERROR("Unable to find %s:%lld in %s", package_name.data(),
1134         ce_data_inode, parent_path.data()));
1135     return nullptr;
1136   }
1137 }
1138 
1139 // Isolate app's data directory, by mounting a tmpfs on CE DE storage,
1140 // and create and bind mount app data in related_packages.
isolateAppDataPerPackage(int userId,std::string_view package_name,std::string_view volume_uuid,long long ce_data_inode,std::string_view actualCePath,std::string_view actualDePath,fail_fn_t fail_fn)1141 static void isolateAppDataPerPackage(int userId, std::string_view package_name,
1142     std::string_view volume_uuid, long long ce_data_inode, std::string_view actualCePath,
1143     std::string_view actualDePath, fail_fn_t fail_fn) {
1144 
1145   char mirrorCePath[PATH_MAX];
1146   char mirrorDePath[PATH_MAX];
1147   char mirrorCeParent[PATH_MAX];
1148   snprintf(mirrorCeParent, PATH_MAX, "/data_mirror/data_ce/%s", volume_uuid.data());
1149   snprintf(mirrorCePath, PATH_MAX, "%s/%d", mirrorCeParent, userId);
1150   snprintf(mirrorDePath, PATH_MAX, "/data_mirror/data_de/%s/%d", volume_uuid.data(), userId);
1151 
1152   createAndMountAppData(package_name, package_name, mirrorDePath, actualDePath, fail_fn,
1153                         true /*call_fail_fn*/);
1154 
1155   std::string ce_data_path = getAppDataDirName(mirrorCePath, package_name, ce_data_inode, fail_fn);
1156   if (!createAndMountAppData(package_name, ce_data_path, mirrorCePath, actualCePath, fail_fn,
1157                              false /*call_fail_fn*/)) {
1158     // CE might unlocks and the name is decrypted
1159     // get the name and mount again
1160     ce_data_path=getAppDataDirName(mirrorCePath, package_name, ce_data_inode, fail_fn);
1161     mountAppData(package_name, ce_data_path, mirrorCePath, actualCePath, fail_fn);
1162   }
1163 }
1164 
1165 // Relabel directory
relabelDir(const char * path,security_context_t context,fail_fn_t fail_fn)1166 static void relabelDir(const char* path, security_context_t context, fail_fn_t fail_fn) {
1167   if (setfilecon(path, context) != 0) {
1168     fail_fn(CREATE_ERROR("Failed to setfilecon %s %s", path, strerror(errno)));
1169   }
1170 }
1171 
1172 // Relabel all directories under a path non-recursively.
relabelAllDirs(const char * path,security_context_t context,fail_fn_t fail_fn)1173 static void relabelAllDirs(const char* path, security_context_t context, fail_fn_t fail_fn) {
1174   DIR* dir = opendir(path);
1175   if (dir == nullptr) {
1176     fail_fn(CREATE_ERROR("Failed to opendir %s", path));
1177   }
1178   struct dirent* ent;
1179   while ((ent = readdir(dir))) {
1180     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1181     auto filePath = StringPrintf("%s/%s", path, ent->d_name);
1182     if (ent->d_type == DT_DIR) {
1183       relabelDir(filePath.c_str(), context, fail_fn);
1184     } else if (ent->d_type == DT_LNK) {
1185       if (lsetfilecon(filePath.c_str(), context) != 0) {
1186         fail_fn(CREATE_ERROR("Failed to lsetfilecon %s %s", filePath.c_str(), strerror(errno)));
1187       }
1188     } else {
1189       fail_fn(CREATE_ERROR("Unexpected type: %d %s", ent->d_type, filePath.c_str()));
1190     }
1191   }
1192   closedir(dir);
1193 }
1194 
1195 /**
1196  * Make other apps data directory not visible in CE, DE storage.
1197  *
1198  * Apps without app data isolation can detect if another app is installed on system,
1199  * by "touching" other apps data directory like /data/data/com.whatsapp, if it returns
1200  * "Permission denied" it means apps installed, otherwise it returns "File not found".
1201  * Traditional file permissions or SELinux can only block accessing those directories but
1202  * can't fix fingerprinting like this.
1203  * We fix it by "overlaying" data directory, and only relevant app data packages exists
1204  * in data directories.
1205  *
1206  * Steps:
1207  * 1). Collect a list of all related apps (apps with same uid and allowlisted apps) data info
1208  * (package name, data stored volume uuid, and inode number of its CE data directory)
1209  * 2). Mount tmpfs on /data/data, /data/user(_de) and /mnt/expand, so apps no longer
1210  * able to access apps data directly.
1211  * 3). For each related app, create its app data directory and bind mount the actual content
1212  * from apps data mirror directory. This works on both CE and DE storage, as DE storage
1213  * is always available even storage is FBE locked, while we use inode number to find
1214  * the encrypted DE directory in mirror so we can still bind mount it successfully.
1215  *
1216  * Example:
1217  * 0). Assuming com.android.foo CE data is stored in /data/data and no shared uid
1218  * 1). Mount a tmpfs on /data/data, /data/user, /data/user_de, /mnt/expand
1219  * List = ["com.android.foo", "null" (volume uuid "null"=default),
1220  * 123456 (inode number)]
1221  * 2). On DE storage, we create a directory /data/user_de/0/com.com.android.foo, and bind
1222  * mount (in the app's mount namespace) it from /data_mirror/data_de/0/com.android.foo.
1223  * 3). We do similar for CE storage. But in direct boot mode, as /data_mirror/data_ce/0/ is
1224  * encrypted, we can't find a directory with name com.android.foo on it, so we will
1225  * use the inode number to find the right directory instead, which that directory content will
1226  * be decrypted after storage is decrypted.
1227  *
1228  */
isolateAppData(JNIEnv * env,const std::vector<std::string> & merged_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1229 static void isolateAppData(JNIEnv* env, const std::vector<std::string>& merged_data_info_list,
1230     uid_t uid, const char* process_name,
1231     jstring managed_nice_name, fail_fn_t fail_fn) {
1232 
1233   const userid_t userId = multiuser_get_user_id(uid);
1234 
1235   int size = merged_data_info_list.size();
1236 
1237   // Mount tmpfs on all possible data directories, so app no longer see the original apps data.
1238   char internalCePath[PATH_MAX];
1239   char internalLegacyCePath[PATH_MAX];
1240   char internalDePath[PATH_MAX];
1241   char externalPrivateMountPath[PATH_MAX];
1242 
1243   snprintf(internalCePath, PATH_MAX, "/data/user");
1244   snprintf(internalLegacyCePath, PATH_MAX, "/data/data");
1245   snprintf(internalDePath, PATH_MAX, "/data/user_de");
1246   snprintf(externalPrivateMountPath, PATH_MAX, "/mnt/expand");
1247 
1248   security_context_t dataDataContext = nullptr;
1249   if (getfilecon(internalDePath, &dataDataContext) < 0) {
1250     fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", internalDePath,
1251         strerror(errno)));
1252   }
1253 
1254   MountAppDataTmpFs(internalLegacyCePath, fail_fn);
1255   MountAppDataTmpFs(internalCePath, fail_fn);
1256   MountAppDataTmpFs(internalDePath, fail_fn);
1257 
1258   // Mount tmpfs on all external vols DE and CE storage
1259   DIR* dir = opendir(externalPrivateMountPath);
1260   if (dir == nullptr) {
1261     fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1262   }
1263   struct dirent* ent;
1264   while ((ent = readdir(dir))) {
1265     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1266     if (ent->d_type != DT_DIR) {
1267       fail_fn(CREATE_ERROR("Unexpected type: %d %s", ent->d_type, ent->d_name));
1268     }
1269     auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1270     auto cePath = StringPrintf("%s/user", volPath.c_str());
1271     auto dePath = StringPrintf("%s/user_de", volPath.c_str());
1272     MountAppDataTmpFs(cePath.c_str(), fail_fn);
1273     MountAppDataTmpFs(dePath.c_str(), fail_fn);
1274   }
1275   closedir(dir);
1276 
1277   // Prepare default dirs for user 0 as user 0 always exists.
1278   int result = symlink("/data/data", "/data/user/0");
1279   if (result != 0) {
1280     fail_fn(CREATE_ERROR("Failed to create symlink /data/user/0 %s", strerror(errno)));
1281   }
1282   PrepareDirIfNotPresent("/data/user_de/0", DEFAULT_DATA_DIR_PERMISSION,
1283       AID_ROOT, AID_ROOT, fail_fn);
1284 
1285   for (int i = 0; i < size; i += 3) {
1286     std::string const & packageName = merged_data_info_list[i];
1287     std::string const & volUuid  = merged_data_info_list[i + 1];
1288     std::string const & inode = merged_data_info_list[i + 2];
1289 
1290     std::string::size_type sz;
1291     long long ceDataInode = std::stoll(inode, &sz);
1292 
1293     std::string actualCePath, actualDePath;
1294     if (volUuid.compare("null") != 0) {
1295       // Volume that is stored in /mnt/expand
1296       char volPath[PATH_MAX];
1297       char volCePath[PATH_MAX];
1298       char volDePath[PATH_MAX];
1299       char volCeUserPath[PATH_MAX];
1300       char volDeUserPath[PATH_MAX];
1301 
1302       snprintf(volPath, PATH_MAX, "/mnt/expand/%s", volUuid.c_str());
1303       snprintf(volCePath, PATH_MAX, "%s/user", volPath);
1304       snprintf(volDePath, PATH_MAX, "%s/user_de", volPath);
1305       snprintf(volCeUserPath, PATH_MAX, "%s/%d", volCePath, userId);
1306       snprintf(volDeUserPath, PATH_MAX, "%s/%d", volDePath, userId);
1307 
1308       PrepareDirIfNotPresent(volPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1309       PrepareDirIfNotPresent(volCePath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1310       PrepareDirIfNotPresent(volDePath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1311       PrepareDirIfNotPresent(volCeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1312           fail_fn);
1313       PrepareDirIfNotPresent(volDeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1314           fail_fn);
1315 
1316       actualCePath = volCeUserPath;
1317       actualDePath = volDeUserPath;
1318     } else {
1319       // Internal volume that stored in /data
1320       char internalCeUserPath[PATH_MAX];
1321       char internalDeUserPath[PATH_MAX];
1322       snprintf(internalCeUserPath, PATH_MAX, "/data/user/%d", userId);
1323       snprintf(internalDeUserPath, PATH_MAX, "/data/user_de/%d", userId);
1324       // If it's not user 0, create /data/user/$USER.
1325       if (userId == 0) {
1326         actualCePath = internalLegacyCePath;
1327       } else {
1328         PrepareDirIfNotPresent(internalCeUserPath, DEFAULT_DATA_DIR_PERMISSION,
1329             AID_ROOT, AID_ROOT, fail_fn);
1330         actualCePath = internalCeUserPath;
1331       }
1332       PrepareDirIfNotPresent(internalDeUserPath, DEFAULT_DATA_DIR_PERMISSION,
1333           AID_ROOT, AID_ROOT, fail_fn);
1334       actualDePath = internalDeUserPath;
1335     }
1336     isolateAppDataPerPackage(userId, packageName, volUuid, ceDataInode,
1337         actualCePath, actualDePath, fail_fn);
1338   }
1339   // We set the label AFTER everything is done, as we are applying
1340   // the file operations on tmpfs. If we set the label when we mount
1341   // tmpfs, SELinux will not happy as we are changing system_data_files.
1342   // Relabel dir under /data/user, including /data/user/0
1343   relabelAllDirs(internalCePath, dataDataContext, fail_fn);
1344 
1345   // Relabel /data/user
1346   relabelDir(internalCePath, dataDataContext, fail_fn);
1347 
1348   // Relabel /data/data
1349   relabelDir(internalLegacyCePath, dataDataContext, fail_fn);
1350 
1351   // Relabel dir under /data/user_de
1352   relabelAllDirs(internalDePath, dataDataContext, fail_fn);
1353 
1354   // Relabel /data/user_de
1355   relabelDir(internalDePath, dataDataContext, fail_fn);
1356 
1357   // Relabel CE and DE dirs under /mnt/expand
1358   dir = opendir(externalPrivateMountPath);
1359   if (dir == nullptr) {
1360     fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1361   }
1362   while ((ent = readdir(dir))) {
1363     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1364     auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1365     auto cePath = StringPrintf("%s/user", volPath.c_str());
1366     auto dePath = StringPrintf("%s/user_de", volPath.c_str());
1367 
1368     relabelAllDirs(cePath.c_str(), dataDataContext, fail_fn);
1369     relabelDir(cePath.c_str(), dataDataContext, fail_fn);
1370     relabelAllDirs(dePath.c_str(), dataDataContext, fail_fn);
1371     relabelDir(dePath.c_str(), dataDataContext, fail_fn);
1372   }
1373   closedir(dir);
1374 
1375   freecon(dataDataContext);
1376 }
1377 
insertPackagesToMergedList(JNIEnv * env,std::vector<std::string> & merged_data_info_list,jobjectArray data_info_list,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1378 static void insertPackagesToMergedList(JNIEnv* env,
1379   std::vector<std::string>& merged_data_info_list,
1380   jobjectArray data_info_list, const char* process_name,
1381   jstring managed_nice_name, fail_fn_t fail_fn) {
1382 
1383   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1384 
1385   int size = (data_info_list != nullptr) ? env->GetArrayLength(data_info_list) : 0;
1386   // Size should be a multiple of 3, as it contains list of <package_name, volume_uuid, inode>
1387   if ((size % 3) != 0) {
1388     fail_fn(CREATE_ERROR("Wrong data_info_list size %d", size));
1389   }
1390 
1391   for (int i = 0; i < size; i += 3) {
1392     jstring package_str = (jstring) (env->GetObjectArrayElement(data_info_list, i));
1393     std::string packageName = extract_fn(package_str).value();
1394     merged_data_info_list.push_back(packageName);
1395 
1396     jstring vol_str = (jstring) (env->GetObjectArrayElement(data_info_list, i + 1));
1397     std::string volUuid = extract_fn(vol_str).value();
1398     merged_data_info_list.push_back(volUuid);
1399 
1400     jstring inode_str = (jstring) (env->GetObjectArrayElement(data_info_list, i + 2));
1401     std::string inode = extract_fn(inode_str).value();
1402     merged_data_info_list.push_back(inode);
1403   }
1404 }
1405 
isolateAppData(JNIEnv * env,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1406 static void isolateAppData(JNIEnv* env, jobjectArray pkg_data_info_list,
1407                            jobjectArray allowlisted_data_info_list, uid_t uid,
1408                            const char* process_name, jstring managed_nice_name, fail_fn_t fail_fn) {
1409     std::vector<std::string> merged_data_info_list;
1410     insertPackagesToMergedList(env, merged_data_info_list, pkg_data_info_list, process_name,
1411                                managed_nice_name, fail_fn);
1412     insertPackagesToMergedList(env, merged_data_info_list, allowlisted_data_info_list, process_name,
1413                                managed_nice_name, fail_fn);
1414 
1415     isolateAppData(env, merged_data_info_list, uid, process_name, managed_nice_name, fail_fn);
1416 }
1417 
1418 /**
1419  * Like isolateAppData(), isolate jit profile directories, so apps don't see what
1420  * other apps are installed by reading content inside /data/misc/profiles/cur.
1421  *
1422  * The implementation is similar to isolateAppData(), it creates a tmpfs
1423  * on /data/misc/profiles/cur, and bind mounts related package profiles to it.
1424  */
isolateJitProfile(JNIEnv * env,jobjectArray pkg_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1425 static void isolateJitProfile(JNIEnv* env, jobjectArray pkg_data_info_list,
1426     uid_t uid, const char* process_name, jstring managed_nice_name,
1427     fail_fn_t fail_fn) {
1428 
1429   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1430   const userid_t user_id = multiuser_get_user_id(uid);
1431 
1432   int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
1433   // Size should be a multiple of 3, as it contains list of <package_name, volume_uuid, inode>
1434   if ((size % 3) != 0) {
1435     fail_fn(CREATE_ERROR("Wrong pkg_inode_list size %d", size));
1436   }
1437 
1438   // Mount (namespace) tmpfs on profile directory, so apps no longer access
1439   // the original profile directory anymore.
1440   MountAppDataTmpFs(kCurProfileDirPath, fail_fn);
1441   MountAppDataTmpFs(kRefProfileDirPath, fail_fn);
1442 
1443   // Create profile directory for this user.
1444   std::string actualCurUserProfile = StringPrintf("%s/%d", kCurProfileDirPath, user_id);
1445   PrepareDir(actualCurUserProfile, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1446 
1447   for (int i = 0; i < size; i += 3) {
1448     jstring package_str = (jstring) (env->GetObjectArrayElement(pkg_data_info_list, i));
1449     std::string packageName = extract_fn(package_str).value();
1450 
1451     std::string actualCurPackageProfile = StringPrintf("%s/%s", actualCurUserProfile.c_str(),
1452         packageName.c_str());
1453     std::string mirrorCurPackageProfile = StringPrintf("/data_mirror/cur_profiles/%d/%s",
1454         user_id, packageName.c_str());
1455     std::string actualRefPackageProfile = StringPrintf("%s/%s", kRefProfileDirPath,
1456         packageName.c_str());
1457     std::string mirrorRefPackageProfile = StringPrintf("/data_mirror/ref_profiles/%s",
1458         packageName.c_str());
1459 
1460     if (access(mirrorCurPackageProfile.c_str(), F_OK) != 0) {
1461       ALOGW("Can't access app profile directory: %s", mirrorCurPackageProfile.c_str());
1462       continue;
1463     }
1464     if (access(mirrorRefPackageProfile.c_str(), F_OK) != 0) {
1465       ALOGW("Can't access app profile directory: %s", mirrorRefPackageProfile.c_str());
1466       continue;
1467     }
1468 
1469     PrepareDir(actualCurPackageProfile, DEFAULT_DATA_DIR_PERMISSION, uid, uid, fail_fn);
1470     BindMount(mirrorCurPackageProfile, actualCurPackageProfile, fail_fn);
1471     PrepareDir(actualRefPackageProfile, DEFAULT_DATA_DIR_PERMISSION, uid, uid, fail_fn);
1472     BindMount(mirrorRefPackageProfile, actualRefPackageProfile, fail_fn);
1473   }
1474 }
1475 
WaitUntilDirReady(const std::string & target,fail_fn_t fail_fn)1476 static void WaitUntilDirReady(const std::string& target, fail_fn_t fail_fn) {
1477   unsigned int sleepIntervalUs = STORAGE_DIR_CHECK_INIT_INTERVAL_US;
1478 
1479   // This is just an approximate value as it doesn't need to be very accurate.
1480   unsigned int sleepTotalUs = 0;
1481 
1482   const char* dir_path = target.c_str();
1483   while (sleepTotalUs < STORAGE_DIR_CHECK_TIMEOUT_US) {
1484     if (access(dir_path, F_OK) == 0) {
1485       return;
1486     }
1487     // Failed, so we add exponential backoff and retry
1488     usleep(sleepIntervalUs);
1489     sleepTotalUs += sleepIntervalUs;
1490     sleepIntervalUs = std::min<unsigned int>(
1491         sleepIntervalUs * STORAGE_DIR_CHECK_RETRY_MULTIPLIER,
1492         STORAGE_DIR_CHECK_MAX_INTERVAL_US);
1493   }
1494   // Last chance and get the latest errno if it fails.
1495   if (access(dir_path, F_OK) == 0) {
1496     return;
1497   }
1498   fail_fn(CREATE_ERROR("Error dir is not ready %s: %s", dir_path, strerror(errno)));
1499 }
1500 
BindMountStorageToLowerFs(const userid_t user_id,const uid_t uid,const char * dir_name,const char * package,fail_fn_t fail_fn)1501 static void BindMountStorageToLowerFs(const userid_t user_id, const uid_t uid,
1502     const char* dir_name, const char* package, fail_fn_t fail_fn) {
1503     bool hasSdcardFs = IsSdcardfsUsed();
1504     std::string source;
1505     if (hasSdcardFs) {
1506         source = StringPrintf("/mnt/runtime/default/emulated/%d/%s/%s", user_id, dir_name, package);
1507     } else {
1508         source = StringPrintf("/mnt/pass_through/%d/emulated/%d/%s/%s", user_id, user_id, dir_name,
1509                               package);
1510     }
1511 
1512   // Directory might be not ready, as prepareStorageDirs() is running asynchronously in ProcessList,
1513   // so wait until dir is created.
1514   WaitUntilDirReady(source, fail_fn);
1515   std::string target = StringPrintf("/storage/emulated/%d/%s/%s", user_id, dir_name, package);
1516 
1517   // As the parent is mounted as tmpfs, we need to create the target dir here.
1518   PrepareDirIfNotPresent(target, 0700, uid, uid, fail_fn);
1519 
1520   if (access(source.c_str(), F_OK) != 0) {
1521     fail_fn(CREATE_ERROR("Error accessing %s: %s", source.c_str(), strerror(errno)));
1522   }
1523   if (access(target.c_str(), F_OK) != 0) {
1524     fail_fn(CREATE_ERROR("Error accessing %s: %s", target.c_str(), strerror(errno)));
1525   }
1526   BindMount(source, target, fail_fn);
1527 }
1528 
1529 // Mount tmpfs on Android/data and Android/obb, then bind mount all app visible package
1530 // directories in data and obb directories.
BindMountStorageDirs(JNIEnv * env,jobjectArray pkg_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1531 static void BindMountStorageDirs(JNIEnv* env, jobjectArray pkg_data_info_list,
1532     uid_t uid, const char* process_name, jstring managed_nice_name, fail_fn_t fail_fn) {
1533 
1534   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1535   const userid_t user_id = multiuser_get_user_id(uid);
1536 
1537   // Fuse is ready, so we can start using fuse path.
1538   int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
1539 
1540   // Create tmpfs on Android/obb and Android/data so these 2 dirs won't enter fuse anymore.
1541   std::string androidObbDir = StringPrintf("/storage/emulated/%d/Android/obb", user_id);
1542   MountAppDataTmpFs(androidObbDir, fail_fn);
1543   std::string androidDataDir = StringPrintf("/storage/emulated/%d/Android/data", user_id);
1544   MountAppDataTmpFs(androidDataDir, fail_fn);
1545 
1546   // Bind mount each package obb directory
1547   for (int i = 0; i < size; i += 3) {
1548     jstring package_str = (jstring) (env->GetObjectArrayElement(pkg_data_info_list, i));
1549     std::string packageName = extract_fn(package_str).value();
1550     BindMountStorageToLowerFs(user_id, uid, "Android/obb", packageName.c_str(), fail_fn);
1551     BindMountStorageToLowerFs(user_id, uid, "Android/data", packageName.c_str(), fail_fn);
1552   }
1553 }
1554 
1555 // Utility routine to specialize a zygote child process.
SpecializeCommon(JNIEnv * env,uid_t uid,gid_t gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jlong permitted_capabilities,jlong effective_capabilities,jint mount_external,jstring managed_se_info,jstring managed_nice_name,bool is_system_server,bool is_child_zygote,jstring managed_instruction_set,jstring managed_app_data_dir,bool is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,bool mount_data_dirs,bool mount_storage_dirs)1556 static void SpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray gids, jint runtime_flags,
1557                              jobjectArray rlimits, jlong permitted_capabilities,
1558                              jlong effective_capabilities, jint mount_external,
1559                              jstring managed_se_info, jstring managed_nice_name,
1560                              bool is_system_server, bool is_child_zygote,
1561                              jstring managed_instruction_set, jstring managed_app_data_dir,
1562                              bool is_top_app, jobjectArray pkg_data_info_list,
1563                              jobjectArray allowlisted_data_info_list, bool mount_data_dirs,
1564                              bool mount_storage_dirs) {
1565     const char* process_name = is_system_server ? "system_server" : "zygote";
1566     auto fail_fn = std::bind(ZygoteFailure, env, process_name, managed_nice_name, _1);
1567     auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1568 
1569     auto se_info = extract_fn(managed_se_info);
1570     auto nice_name = extract_fn(managed_nice_name);
1571     auto instruction_set = extract_fn(managed_instruction_set);
1572     auto app_data_dir = extract_fn(managed_app_data_dir);
1573 
1574     // Keep capabilities across UID change, unless we're staying root.
1575     if (uid != 0) {
1576         EnableKeepCapabilities(fail_fn);
1577     }
1578 
1579     SetInheritable(permitted_capabilities, fail_fn);
1580 
1581     DropCapabilitiesBoundingSet(fail_fn);
1582 
1583     bool need_pre_initialize_native_bridge = !is_system_server && instruction_set.has_value() &&
1584             android::NativeBridgeAvailable() &&
1585             // Native bridge may be already initialized if this
1586             // is an app forked from app-zygote.
1587             !android::NativeBridgeInitialized() &&
1588             android::NeedsNativeBridge(instruction_set.value().c_str());
1589 
1590     MountEmulatedStorage(uid, mount_external, need_pre_initialize_native_bridge, fail_fn);
1591 
1592     // Make sure app is running in its own mount namespace before isolating its data directories.
1593     ensureInAppMountNamespace(fail_fn);
1594 
1595     // Sandbox data and jit profile directories by overlaying a tmpfs on those dirs and bind
1596     // mount all related packages separately.
1597     if (mount_data_dirs) {
1598         isolateAppData(env, pkg_data_info_list, allowlisted_data_info_list, uid, process_name,
1599                        managed_nice_name, fail_fn);
1600         isolateJitProfile(env, pkg_data_info_list, uid, process_name, managed_nice_name, fail_fn);
1601     }
1602     // MOUNT_EXTERNAL_INSTALLER, MOUNT_EXTERNAL_PASS_THROUGH, MOUNT_EXTERNAL_ANDROID_WRITABLE apps
1603     // will have mount_storage_dirs == false here (set by ProcessList.needsStorageDataIsolation()),
1604     // and hence they won't bind mount storage dirs.
1605     if (mount_storage_dirs) {
1606         BindMountStorageDirs(env, pkg_data_info_list, uid, process_name, managed_nice_name,
1607                              fail_fn);
1608     }
1609 
1610     // If this zygote isn't root, it won't be able to create a process group,
1611     // since the directory is owned by root.
1612     if (!is_system_server && getuid() == 0) {
1613         const int rc = createProcessGroup(uid, getpid());
1614         if (rc == -EROFS) {
1615             ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
1616         } else if (rc != 0) {
1617             ALOGE("createProcessGroup(%d, %d) failed: %s", uid, /* pid= */ 0, strerror(-rc));
1618         }
1619     }
1620 
1621     SetGids(env, gids, is_child_zygote, fail_fn);
1622     SetRLimits(env, rlimits, fail_fn);
1623 
1624     if (need_pre_initialize_native_bridge) {
1625         // Due to the logic behind need_pre_initialize_native_bridge we know that
1626         // instruction_set contains a value.
1627         android::PreInitializeNativeBridge(app_data_dir.has_value() ? app_data_dir.value().c_str()
1628                                                                     : nullptr,
1629                                            instruction_set.value().c_str());
1630     }
1631 
1632     if (is_system_server) {
1633         // Prefetch the classloader for the system server. This is done early to
1634         // allow a tie-down of the proper system server selinux domain.
1635         env->CallStaticObjectMethod(gZygoteInitClass, gGetOrCreateSystemServerClassLoader);
1636         if (env->ExceptionCheck()) {
1637             // Be robust here. The Java code will attempt to create the classloader
1638             // at a later point (but may not have rights to use AoT artifacts).
1639             env->ExceptionClear();
1640         }
1641     }
1642 
1643     if (setresgid(gid, gid, gid) == -1) {
1644         fail_fn(CREATE_ERROR("setresgid(%d) failed: %s", gid, strerror(errno)));
1645     }
1646 
1647     // Must be called when the new process still has CAP_SYS_ADMIN, in this case,
1648     // before changing uid from 0, which clears capabilities.  The other
1649     // alternative is to call prctl(PR_SET_NO_NEW_PRIVS, 1) afterward, but that
1650     // breaks SELinux domain transition (see b/71859146).  As the result,
1651     // privileged syscalls used below still need to be accessible in app process.
1652     SetUpSeccompFilter(uid, is_child_zygote);
1653 
1654     // Must be called before losing the permission to set scheduler policy.
1655     SetSchedulerPolicy(fail_fn, is_top_app);
1656 
1657     if (setresuid(uid, uid, uid) == -1) {
1658         fail_fn(CREATE_ERROR("setresuid(%d) failed: %s", uid, strerror(errno)));
1659     }
1660 
1661     // The "dumpable" flag of a process, which controls core dump generation, is
1662     // overwritten by the value in /proc/sys/fs/suid_dumpable when the effective
1663     // user or group ID changes. See proc(5) for possible values. In most cases,
1664     // the value is 0, so core dumps are disabled for zygote children. However,
1665     // when running in a Chrome OS container, the value is already set to 2,
1666     // which allows the external crash reporter to collect all core dumps. Since
1667     // only system crashes are interested, core dump is disabled for app
1668     // processes. This also ensures compliance with CTS.
1669     int dumpable = prctl(PR_GET_DUMPABLE);
1670     if (dumpable == -1) {
1671         ALOGE("prctl(PR_GET_DUMPABLE) failed: %s", strerror(errno));
1672         RuntimeAbort(env, __LINE__, "prctl(PR_GET_DUMPABLE) failed");
1673     }
1674 
1675     if (dumpable == 2 && uid >= AID_APP) {
1676         if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) == -1) {
1677             ALOGE("prctl(PR_SET_DUMPABLE, 0) failed: %s", strerror(errno));
1678             RuntimeAbort(env, __LINE__, "prctl(PR_SET_DUMPABLE, 0) failed");
1679         }
1680     }
1681 
1682     // Set process properties to enable debugging if required.
1683     if ((runtime_flags & RuntimeFlags::DEBUG_ENABLE_JDWP) != 0) {
1684         EnableDebugger();
1685     }
1686     if ((runtime_flags & RuntimeFlags::PROFILE_FROM_SHELL) != 0) {
1687         // simpleperf needs the process to be dumpable to profile it.
1688         if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
1689             ALOGE("prctl(PR_SET_DUMPABLE) failed: %s", strerror(errno));
1690             RuntimeAbort(env, __LINE__, "prctl(PR_SET_DUMPABLE, 1) failed");
1691         }
1692     }
1693 
1694     HeapTaggingLevel heap_tagging_level;
1695     switch (runtime_flags & RuntimeFlags::MEMORY_TAG_LEVEL_MASK) {
1696         case RuntimeFlags::MEMORY_TAG_LEVEL_TBI:
1697             heap_tagging_level = M_HEAP_TAGGING_LEVEL_TBI;
1698             break;
1699         case RuntimeFlags::MEMORY_TAG_LEVEL_ASYNC:
1700             heap_tagging_level = M_HEAP_TAGGING_LEVEL_ASYNC;
1701             break;
1702         case RuntimeFlags::MEMORY_TAG_LEVEL_SYNC:
1703             heap_tagging_level = M_HEAP_TAGGING_LEVEL_SYNC;
1704             break;
1705         default:
1706             heap_tagging_level = M_HEAP_TAGGING_LEVEL_NONE;
1707             break;
1708     }
1709     mallopt(M_BIONIC_SET_HEAP_TAGGING_LEVEL, heap_tagging_level);
1710 
1711     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
1712     // runtime.
1713     runtime_flags &= ~RuntimeFlags::MEMORY_TAG_LEVEL_MASK;
1714 
1715     // Avoid heap zero initialization for applications without MTE. Zero init may
1716     // cause app compat problems, use more memory, or reduce performance. While it
1717     // would be nice to have them for apps, we will have to wait until they are
1718     // proven out, have more efficient hardware, and/or apply them only to new
1719     // applications.
1720     if (!(runtime_flags & RuntimeFlags::NATIVE_HEAP_ZERO_INIT)) {
1721         mallopt(M_BIONIC_ZERO_INIT, 0);
1722     }
1723 
1724     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
1725     // runtime.
1726     runtime_flags &= ~RuntimeFlags::NATIVE_HEAP_ZERO_INIT;
1727 
1728     bool forceEnableGwpAsan = false;
1729     switch (runtime_flags & RuntimeFlags::GWP_ASAN_LEVEL_MASK) {
1730         default:
1731         case RuntimeFlags::GWP_ASAN_LEVEL_NEVER:
1732             break;
1733         case RuntimeFlags::GWP_ASAN_LEVEL_ALWAYS:
1734             forceEnableGwpAsan = true;
1735             [[fallthrough]];
1736         case RuntimeFlags::GWP_ASAN_LEVEL_LOTTERY:
1737             android_mallopt(M_INITIALIZE_GWP_ASAN, &forceEnableGwpAsan, sizeof(forceEnableGwpAsan));
1738     }
1739     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
1740     // runtime.
1741     runtime_flags &= ~RuntimeFlags::GWP_ASAN_LEVEL_MASK;
1742 
1743     if (NeedsNoRandomizeWorkaround()) {
1744         // Work around ARM kernel ASLR lossage (http://b/5817320).
1745         int old_personality = personality(0xffffffff);
1746         int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE);
1747         if (new_personality == -1) {
1748             ALOGW("personality(%d) failed: %s", new_personality, strerror(errno));
1749         }
1750     }
1751 
1752     SetCapabilities(permitted_capabilities, effective_capabilities, permitted_capabilities,
1753                     fail_fn);
1754 
1755     __android_log_close();
1756     AStatsSocket_close();
1757 
1758     const char* se_info_ptr = se_info.has_value() ? se_info.value().c_str() : nullptr;
1759     const char* nice_name_ptr = nice_name.has_value() ? nice_name.value().c_str() : nullptr;
1760 
1761     if (selinux_android_setcontext(uid, is_system_server, se_info_ptr, nice_name_ptr) == -1) {
1762         fail_fn(CREATE_ERROR("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
1763                              is_system_server, se_info_ptr, nice_name_ptr));
1764     }
1765 
1766     // Make it easier to debug audit logs by setting the main thread's name to the
1767     // nice name rather than "app_process".
1768     if (nice_name.has_value()) {
1769         SetThreadName(nice_name.value());
1770     } else if (is_system_server) {
1771         SetThreadName("system_server");
1772     }
1773 
1774     // Unset the SIGCHLD handler, but keep ignoring SIGHUP (rationale in SetSignalHandlers).
1775     UnsetChldSignalHandler();
1776 
1777     if (is_system_server) {
1778         env->CallStaticVoidMethod(gZygoteClass, gCallPostForkSystemServerHooks, runtime_flags);
1779         if (env->ExceptionCheck()) {
1780             fail_fn("Error calling post fork system server hooks.");
1781         }
1782 
1783         // TODO(b/117874058): Remove hardcoded label here.
1784         static const char* kSystemServerLabel = "u:r:system_server:s0";
1785         if (selinux_android_setcon(kSystemServerLabel) != 0) {
1786             fail_fn(CREATE_ERROR("selinux_android_setcon(%s)", kSystemServerLabel));
1787         }
1788     }
1789 
1790     if (is_child_zygote) {
1791         initUnsolSocketToSystemServer();
1792     }
1793 
1794     env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, runtime_flags,
1795                               is_system_server, is_child_zygote, managed_instruction_set);
1796 
1797     // Reset the process priority to the default value.
1798     setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_DEFAULT);
1799 
1800     if (env->ExceptionCheck()) {
1801         fail_fn("Error calling post fork hooks.");
1802     }
1803 }
1804 
GetEffectiveCapabilityMask(JNIEnv * env)1805 static uint64_t GetEffectiveCapabilityMask(JNIEnv* env) {
1806     __user_cap_header_struct capheader;
1807     memset(&capheader, 0, sizeof(capheader));
1808     capheader.version = _LINUX_CAPABILITY_VERSION_3;
1809     capheader.pid = 0;
1810 
1811     __user_cap_data_struct capdata[2];
1812     if (capget(&capheader, &capdata[0]) == -1) {
1813         ALOGE("capget failed: %s", strerror(errno));
1814         RuntimeAbort(env, __LINE__, "capget failed");
1815     }
1816 
1817     return capdata[0].effective | (static_cast<uint64_t>(capdata[1].effective) << 32);
1818 }
1819 
CalculateCapabilities(JNIEnv * env,jint uid,jint gid,jintArray gids,bool is_child_zygote)1820 static jlong CalculateCapabilities(JNIEnv* env, jint uid, jint gid, jintArray gids,
1821                                    bool is_child_zygote) {
1822   jlong capabilities = 0;
1823 
1824   /*
1825    *  Grant the following capabilities to the Bluetooth user:
1826    *    - CAP_WAKE_ALARM
1827    *    - CAP_NET_ADMIN
1828    *    - CAP_NET_RAW
1829    *    - CAP_NET_BIND_SERVICE (for DHCP client functionality)
1830    *    - CAP_SYS_NICE (for setting RT priority for audio-related threads)
1831    */
1832 
1833   if (multiuser_get_app_id(uid) == AID_BLUETOOTH) {
1834     capabilities |= (1LL << CAP_WAKE_ALARM);
1835     capabilities |= (1LL << CAP_NET_ADMIN);
1836     capabilities |= (1LL << CAP_NET_RAW);
1837     capabilities |= (1LL << CAP_NET_BIND_SERVICE);
1838     capabilities |= (1LL << CAP_SYS_NICE);
1839   }
1840 
1841   if (multiuser_get_app_id(uid) == AID_NETWORK_STACK) {
1842     capabilities |= (1LL << CAP_NET_ADMIN);
1843     capabilities |= (1LL << CAP_NET_BROADCAST);
1844     capabilities |= (1LL << CAP_NET_BIND_SERVICE);
1845     capabilities |= (1LL << CAP_NET_RAW);
1846   }
1847 
1848   /*
1849    * Grant CAP_BLOCK_SUSPEND to processes that belong to GID "wakelock"
1850    */
1851 
1852   bool gid_wakelock_found = false;
1853   if (gid == AID_WAKELOCK) {
1854     gid_wakelock_found = true;
1855   } else if (gids != nullptr) {
1856     jsize gids_num = env->GetArrayLength(gids);
1857     ScopedIntArrayRO native_gid_proxy(env, gids);
1858 
1859     if (native_gid_proxy.get() == nullptr) {
1860       RuntimeAbort(env, __LINE__, "Bad gids array");
1861     }
1862 
1863     for (int gids_index = 0; gids_index < gids_num; ++gids_index) {
1864       if (native_gid_proxy[gids_index] == AID_WAKELOCK) {
1865         gid_wakelock_found = true;
1866         break;
1867       }
1868     }
1869   }
1870 
1871   if (gid_wakelock_found) {
1872     capabilities |= (1LL << CAP_BLOCK_SUSPEND);
1873   }
1874 
1875   /*
1876    * Grant child Zygote processes the following capabilities:
1877    *   - CAP_SETUID (change UID of child processes)
1878    *   - CAP_SETGID (change GID of child processes)
1879    *   - CAP_SETPCAP (change capabilities of child processes)
1880    */
1881 
1882   if (is_child_zygote) {
1883     capabilities |= (1LL << CAP_SETUID);
1884     capabilities |= (1LL << CAP_SETGID);
1885     capabilities |= (1LL << CAP_SETPCAP);
1886   }
1887 
1888   /*
1889    * Containers run without some capabilities, so drop any caps that are not
1890    * available.
1891    */
1892 
1893   return capabilities & GetEffectiveCapabilityMask(env);
1894 }
1895 
1896 /**
1897  * Adds the given information about a newly created unspecialized app
1898  * processes to the Zygote's USAP table.
1899  *
1900  * @param usap_pid  Process ID of the newly created USAP
1901  * @param read_pipe_fd  File descriptor for the read end of the USAP
1902  * reporting pipe.  Used in the ZygoteServer poll loop to track USAP
1903  * specialization.
1904  */
AddUsapTableEntry(pid_t usap_pid,int read_pipe_fd)1905 static void AddUsapTableEntry(pid_t usap_pid, int read_pipe_fd) {
1906   static int sUsapTableInsertIndex = 0;
1907 
1908   int search_index = sUsapTableInsertIndex;
1909   do {
1910     if (gUsapTable[search_index].SetIfInvalid(usap_pid, read_pipe_fd)) {
1911       ++gUsapPoolCount;
1912 
1913       // Start our next search right after where we finished this one.
1914       sUsapTableInsertIndex = (search_index + 1) % gUsapTable.size();
1915 
1916       return;
1917     }
1918 
1919     search_index = (search_index + 1) % gUsapTable.size();
1920   } while (search_index != sUsapTableInsertIndex);
1921 
1922   // Much like money in the banana stand, there should always be an entry
1923   // in the USAP table.
1924   __builtin_unreachable();
1925 }
1926 
1927 /**
1928  * Invalidates the entry in the USAPTable corresponding to the provided
1929  * process ID if it is present.  If an entry was removed the USAP pool
1930  * count is decremented. May be called from signal handler.
1931  *
1932  * @param usap_pid  Process ID of the USAP entry to invalidate
1933  * @return True if an entry was invalidated; false otherwise
1934  */
RemoveUsapTableEntry(pid_t usap_pid)1935 static bool RemoveUsapTableEntry(pid_t usap_pid) {
1936   for (UsapTableEntry& entry : gUsapTable) {
1937     if (entry.ClearForPID(usap_pid)) {
1938       --gUsapPoolCount;
1939       return true;
1940     }
1941   }
1942 
1943   return false;
1944 }
1945 
1946 /**
1947  * @return A vector of the read pipe FDs for each of the active USAPs.
1948  */
MakeUsapPipeReadFDVector()1949 std::vector<int> MakeUsapPipeReadFDVector() {
1950   std::vector<int> fd_vec;
1951   fd_vec.reserve(gUsapTable.size());
1952 
1953   for (UsapTableEntry& entry : gUsapTable) {
1954     auto entry_values = entry.GetValues();
1955 
1956     if (entry_values.has_value()) {
1957       fd_vec.push_back(entry_values.value().read_pipe_fd);
1958     }
1959   }
1960 
1961   return fd_vec;
1962 }
1963 
UnmountStorageOnInit(JNIEnv * env)1964 static void UnmountStorageOnInit(JNIEnv* env) {
1965   // Zygote process unmount root storage space initially before every child processes are forked.
1966   // Every forked child processes (include SystemServer) only mount their own root storage space
1967   // and no need unmount storage operation in MountEmulatedStorage method.
1968   // Zygote process does not utilize root storage spaces and unshares its mount namespace below.
1969 
1970   // See storage config details at http://source.android.com/tech/storage/
1971   // Create private mount namespace shared by all children
1972   if (unshare(CLONE_NEWNS) == -1) {
1973     RuntimeAbort(env, __LINE__, "Failed to unshare()");
1974     return;
1975   }
1976 
1977   // Mark rootfs as being MS_SLAVE so that changes from default
1978   // namespace only flow into our children.
1979   if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
1980     RuntimeAbort(env, __LINE__, "Failed to mount() rootfs as MS_SLAVE");
1981     return;
1982   }
1983 
1984   // Create a staging tmpfs that is shared by our children; they will
1985   // bind mount storage into their respective private namespaces, which
1986   // are isolated from each other.
1987   const char* target_base = getenv("EMULATED_STORAGE_TARGET");
1988   if (target_base != nullptr) {
1989 #define STRINGIFY_UID(x) __STRING(x)
1990     if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV,
1991               "uid=0,gid=" STRINGIFY_UID(AID_SDCARD_R) ",mode=0751") == -1) {
1992       ALOGE("Failed to mount tmpfs to %s", target_base);
1993       RuntimeAbort(env, __LINE__, "Failed to mount tmpfs");
1994       return;
1995     }
1996 #undef STRINGIFY_UID
1997   }
1998 
1999   UnmountTree("/storage");
2000 }
2001 
2002 }  // anonymous namespace
2003 
2004 namespace android {
2005 
2006 /**
2007  * A failure function used to report fatal errors to the managed runtime.  This
2008  * function is often curried with the process name information and then passed
2009  * to called functions.
2010  *
2011  * @param env  Managed runtime environment
2012  * @param process_name  A native representation of the process name
2013  * @param managed_process_name  A managed representation of the process name
2014  * @param msg  The error message to be reported
2015  */
2016 [[noreturn]]
ZygoteFailure(JNIEnv * env,const char * process_name,jstring managed_process_name,const std::string & msg)2017 void zygote::ZygoteFailure(JNIEnv* env,
2018                            const char* process_name,
2019                            jstring managed_process_name,
2020                            const std::string& msg) {
2021   std::unique_ptr<ScopedUtfChars> scoped_managed_process_name_ptr = nullptr;
2022   if (managed_process_name != nullptr) {
2023     scoped_managed_process_name_ptr.reset(new ScopedUtfChars(env, managed_process_name));
2024     if (scoped_managed_process_name_ptr->c_str() != nullptr) {
2025       process_name = scoped_managed_process_name_ptr->c_str();
2026     }
2027   }
2028 
2029   const std::string& error_msg =
2030       (process_name == nullptr || process_name[0] == '\0') ?
2031       msg : StringPrintf("(%s) %s", process_name, msg.c_str());
2032 
2033   env->FatalError(error_msg.c_str());
2034   __builtin_unreachable();
2035 }
2036 
2037 static std::set<int>* gPreloadFds = nullptr;
2038 static bool gPreloadFdsExtracted = false;
2039 
2040 // Utility routine to fork a process from the zygote.
ForkCommon(JNIEnv * env,bool is_system_server,const std::vector<int> & fds_to_close,const std::vector<int> & fds_to_ignore,bool is_priority_fork,bool purge)2041 pid_t zygote::ForkCommon(JNIEnv* env, bool is_system_server,
2042                          const std::vector<int>& fds_to_close,
2043                          const std::vector<int>& fds_to_ignore,
2044                          bool is_priority_fork,
2045                          bool purge) {
2046   SetSignalHandlers();
2047 
2048   // Curry a failure function.
2049   auto fail_fn = std::bind(zygote::ZygoteFailure, env,
2050                            is_system_server ? "system_server" : "zygote",
2051                            nullptr, _1);
2052 
2053   // Temporarily block SIGCHLD during forks. The SIGCHLD handler might
2054   // log, which would result in the logging FDs we close being reopened.
2055   // This would cause failures because the FDs are not allowlisted.
2056   //
2057   // Note that the zygote process is single threaded at this point.
2058   BlockSignal(SIGCHLD, fail_fn);
2059 
2060   // Close any logging related FDs before we start evaluating the list of
2061   // file descriptors.
2062   __android_log_close();
2063   AStatsSocket_close();
2064 
2065   // If this is the first fork for this zygote, create the open FD table,
2066   // verifying that files are of supported type and allowlisted.  Otherwise (not
2067   // the first fork), check that the open files have not changed.  Newly open
2068   // files are not expected, and will be disallowed in the future.  Currently
2069   // they are allowed if they pass the same checks as in the
2070   // FileDescriptorTable::Create() above.
2071   if (gOpenFdTable == nullptr) {
2072     gOpenFdTable = FileDescriptorTable::Create(fds_to_ignore, fail_fn);
2073   } else {
2074     gOpenFdTable->Restat(fds_to_ignore, fail_fn);
2075   }
2076 
2077   android_fdsan_error_level fdsan_error_level = android_fdsan_get_error_level();
2078 
2079   if (purge) {
2080     // Purge unused native memory in an attempt to reduce the amount of false
2081     // sharing with the child process.  By reducing the size of the libc_malloc
2082     // region shared with the child process we reduce the number of pages that
2083     // transition to the private-dirty state when malloc adjusts the meta-data
2084     // on each of the pages it is managing after the fork.
2085     mallopt(M_PURGE, 0);
2086   }
2087 
2088   pid_t pid = fork();
2089 
2090   if (pid == 0) {
2091     if (is_priority_fork) {
2092       setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MAX);
2093     } else {
2094       setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MIN);
2095     }
2096 
2097     // The child process.
2098     PreApplicationInit();
2099 
2100     // Clean up any descriptors which must be closed immediately
2101     DetachDescriptors(env, fds_to_close, fail_fn);
2102 
2103     // Invalidate the entries in the USAP table.
2104     ClearUsapTable();
2105 
2106     // Re-open all remaining open file descriptors so that they aren't shared
2107     // with the zygote across a fork.
2108     gOpenFdTable->ReopenOrDetach(fail_fn);
2109 
2110     // Turn fdsan back on.
2111     android_fdsan_set_error_level(fdsan_error_level);
2112 
2113     // Reset the fd to the unsolicited zygote socket
2114     gSystemServerSocketFd = -1;
2115   } else {
2116     ALOGD("Forked child process %d", pid);
2117   }
2118 
2119   // We blocked SIGCHLD prior to a fork, we unblock it here.
2120   UnblockSignal(SIGCHLD, fail_fn);
2121 
2122   return pid;
2123 }
2124 
com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv *,jclass)2125 static void com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv*, jclass) {
2126   PreApplicationInit();
2127 }
2128 
com_android_internal_os_Zygote_nativeForkAndSpecialize(JNIEnv * env,jclass,jint uid,jint gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jint mount_external,jstring se_info,jstring nice_name,jintArray managed_fds_to_close,jintArray managed_fds_to_ignore,jboolean is_child_zygote,jstring instruction_set,jstring app_data_dir,jboolean is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,jboolean mount_data_dirs,jboolean mount_storage_dirs)2129 static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
2130         JNIEnv* env, jclass, jint uid, jint gid, jintArray gids, jint runtime_flags,
2131         jobjectArray rlimits, jint mount_external, jstring se_info, jstring nice_name,
2132         jintArray managed_fds_to_close, jintArray managed_fds_to_ignore, jboolean is_child_zygote,
2133         jstring instruction_set, jstring app_data_dir, jboolean is_top_app,
2134         jobjectArray pkg_data_info_list, jobjectArray allowlisted_data_info_list,
2135         jboolean mount_data_dirs, jboolean mount_storage_dirs) {
2136     jlong capabilities = CalculateCapabilities(env, uid, gid, gids, is_child_zygote);
2137 
2138     if (UNLIKELY(managed_fds_to_close == nullptr)) {
2139       zygote::ZygoteFailure(env, "zygote", nice_name,
2140                             "Zygote received a null fds_to_close vector.");
2141     }
2142 
2143     std::vector<int> fds_to_close =
2144         ExtractJIntArray(env, "zygote", nice_name, managed_fds_to_close).value();
2145     std::vector<int> fds_to_ignore =
2146         ExtractJIntArray(env, "zygote", nice_name, managed_fds_to_ignore)
2147             .value_or(std::vector<int>());
2148 
2149     std::vector<int> usap_pipes = MakeUsapPipeReadFDVector();
2150 
2151     fds_to_close.insert(fds_to_close.end(), usap_pipes.begin(), usap_pipes.end());
2152     fds_to_ignore.insert(fds_to_ignore.end(), usap_pipes.begin(), usap_pipes.end());
2153 
2154     fds_to_close.push_back(gUsapPoolSocketFD);
2155 
2156     if (gUsapPoolEventFD != -1) {
2157       fds_to_close.push_back(gUsapPoolEventFD);
2158       fds_to_ignore.push_back(gUsapPoolEventFD);
2159     }
2160 
2161     if (gSystemServerSocketFd != -1) {
2162         fds_to_close.push_back(gSystemServerSocketFd);
2163         fds_to_ignore.push_back(gSystemServerSocketFd);
2164     }
2165 
2166     if (gPreloadFds && gPreloadFdsExtracted) {
2167         fds_to_ignore.insert(fds_to_ignore.end(), gPreloadFds->begin(), gPreloadFds->end());
2168     }
2169 
2170     pid_t pid = zygote::ForkCommon(env, /* is_system_server= */ false, fds_to_close, fds_to_ignore,
2171                                    true);
2172 
2173     if (pid == 0) {
2174         SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, capabilities, capabilities,
2175                          mount_external, se_info, nice_name, false, is_child_zygote == JNI_TRUE,
2176                          instruction_set, app_data_dir, is_top_app == JNI_TRUE, pkg_data_info_list,
2177                          allowlisted_data_info_list, mount_data_dirs == JNI_TRUE,
2178                          mount_storage_dirs == JNI_TRUE);
2179     }
2180     return pid;
2181 }
2182 
com_android_internal_os_Zygote_nativeForkSystemServer(JNIEnv * env,jclass,uid_t uid,gid_t gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jlong permitted_capabilities,jlong effective_capabilities)2183 static jint com_android_internal_os_Zygote_nativeForkSystemServer(
2184         JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
2185         jint runtime_flags, jobjectArray rlimits, jlong permitted_capabilities,
2186         jlong effective_capabilities) {
2187   std::vector<int> fds_to_close(MakeUsapPipeReadFDVector()),
2188                    fds_to_ignore(fds_to_close);
2189 
2190   fds_to_close.push_back(gUsapPoolSocketFD);
2191 
2192   if (gUsapPoolEventFD != -1) {
2193     fds_to_close.push_back(gUsapPoolEventFD);
2194     fds_to_ignore.push_back(gUsapPoolEventFD);
2195   }
2196 
2197   if (gSystemServerSocketFd != -1) {
2198       fds_to_close.push_back(gSystemServerSocketFd);
2199       fds_to_ignore.push_back(gSystemServerSocketFd);
2200   }
2201 
2202   pid_t pid = zygote::ForkCommon(env, true,
2203                                  fds_to_close,
2204                                  fds_to_ignore,
2205                                  true);
2206   if (pid == 0) {
2207       // System server prcoess does not need data isolation so no need to
2208       // know pkg_data_info_list.
2209       SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, permitted_capabilities,
2210                        effective_capabilities, MOUNT_EXTERNAL_DEFAULT, nullptr, nullptr, true,
2211                        false, nullptr, nullptr, /* is_top_app= */ false,
2212                        /* pkg_data_info_list */ nullptr,
2213                        /* allowlisted_data_info_list */ nullptr, false, false);
2214   } else if (pid > 0) {
2215       // The zygote process checks whether the child process has died or not.
2216       ALOGI("System server process %d has been created", pid);
2217       gSystemServerPid = pid;
2218       // There is a slight window that the system server process has crashed
2219       // but it went unnoticed because we haven't published its pid yet. So
2220       // we recheck here just to make sure that all is well.
2221       int status;
2222       if (waitpid(pid, &status, WNOHANG) == pid) {
2223           ALOGE("System server process %d has died. Restarting Zygote!", pid);
2224           RuntimeAbort(env, __LINE__, "System server process has died. Restarting Zygote!");
2225       }
2226 
2227       if (UsePerAppMemcg()) {
2228           // Assign system_server to the correct memory cgroup.
2229           // Not all devices mount memcg so check if it is mounted first
2230           // to avoid unnecessarily printing errors and denials in the logs.
2231           if (!SetTaskProfiles(pid, std::vector<std::string>{"SystemMemoryProcess"})) {
2232               ALOGE("couldn't add process %d into system memcg group", pid);
2233           }
2234       }
2235   }
2236   return pid;
2237 }
2238 
2239 /**
2240  * A JNI function that forks an unspecialized app process from the Zygote while
2241  * ensuring proper file descriptor hygiene.
2242  *
2243  * @param env  Managed runtime environment
2244  * @param read_pipe_fd  The read FD for the USAP reporting pipe.  Manually closed by the child
2245  * in managed code. -1 indicates none.
2246  * @param write_pipe_fd  The write FD for the USAP reporting pipe.  Manually closed by the
2247  * zygote in managed code. -1 indicates none.
2248  * @param managed_session_socket_fds  A list of anonymous session sockets that must be ignored by
2249  * the FD hygiene code and automatically "closed" in the new USAP.
2250  * @param args_known Arguments for specialization are available; no need to read from a socket
2251  * @param is_priority_fork  Controls the nice level assigned to the newly created process
2252  * @return child pid in the parent, 0 in the child
2253  */
com_android_internal_os_Zygote_nativeForkApp(JNIEnv * env,jclass,jint read_pipe_fd,jint write_pipe_fd,jintArray managed_session_socket_fds,jboolean args_known,jboolean is_priority_fork)2254 static jint com_android_internal_os_Zygote_nativeForkApp(JNIEnv* env,
2255                                                          jclass,
2256                                                          jint read_pipe_fd,
2257                                                          jint write_pipe_fd,
2258                                                          jintArray managed_session_socket_fds,
2259                                                          jboolean args_known,
2260                                                          jboolean is_priority_fork) {
2261   std::vector<int> session_socket_fds =
2262       ExtractJIntArray(env, "USAP", nullptr, managed_session_socket_fds)
2263           .value_or(std::vector<int>());
2264   return zygote::forkApp(env, read_pipe_fd, write_pipe_fd, session_socket_fds,
2265                             args_known == JNI_TRUE, is_priority_fork == JNI_TRUE, true);
2266 }
2267 
forkApp(JNIEnv * env,int read_pipe_fd,int write_pipe_fd,const std::vector<int> & session_socket_fds,bool args_known,bool is_priority_fork,bool purge)2268 int zygote::forkApp(JNIEnv* env,
2269                     int read_pipe_fd,
2270                     int write_pipe_fd,
2271                     const std::vector<int>& session_socket_fds,
2272                     bool args_known,
2273                     bool is_priority_fork,
2274                     bool purge) {
2275 
2276   std::vector<int> fds_to_close(MakeUsapPipeReadFDVector()),
2277                    fds_to_ignore(fds_to_close);
2278 
2279   fds_to_close.push_back(gZygoteSocketFD);
2280   if (gSystemServerSocketFd != -1) {
2281       fds_to_close.push_back(gSystemServerSocketFd);
2282   }
2283   if (args_known) {
2284       fds_to_close.push_back(gUsapPoolSocketFD);
2285   }
2286   fds_to_close.insert(fds_to_close.end(), session_socket_fds.begin(), session_socket_fds.end());
2287 
2288   fds_to_ignore.push_back(gUsapPoolSocketFD);
2289   fds_to_ignore.push_back(gZygoteSocketFD);
2290   if (read_pipe_fd != -1) {
2291       fds_to_ignore.push_back(read_pipe_fd);
2292   }
2293   if (write_pipe_fd != -1) {
2294       fds_to_ignore.push_back(write_pipe_fd);
2295   }
2296   fds_to_ignore.insert(fds_to_ignore.end(), session_socket_fds.begin(), session_socket_fds.end());
2297 
2298   if (gUsapPoolEventFD != -1) {
2299       fds_to_close.push_back(gUsapPoolEventFD);
2300       fds_to_ignore.push_back(gUsapPoolEventFD);
2301   }
2302   if (gSystemServerSocketFd != -1) {
2303       if (args_known) {
2304           fds_to_close.push_back(gSystemServerSocketFd);
2305       }
2306       fds_to_ignore.push_back(gSystemServerSocketFd);
2307   }
2308   if (gPreloadFds && gPreloadFdsExtracted) {
2309       fds_to_ignore.insert(fds_to_ignore.end(), gPreloadFds->begin(), gPreloadFds->end());
2310   }
2311 
2312   return zygote::ForkCommon(env, /* is_system_server= */ false, fds_to_close,
2313                             fds_to_ignore, is_priority_fork == JNI_TRUE, purge);
2314 }
2315 
com_android_internal_os_Zygote_nativeAllowFileAcrossFork(JNIEnv * env,jclass,jstring path)2316 static void com_android_internal_os_Zygote_nativeAllowFileAcrossFork(
2317         JNIEnv* env, jclass, jstring path) {
2318     ScopedUtfChars path_native(env, path);
2319     const char* path_cstr = path_native.c_str();
2320     if (!path_cstr) {
2321         RuntimeAbort(env, __LINE__, "path_cstr == nullptr");
2322     }
2323     FileDescriptorAllowlist::Get()->Allow(path_cstr);
2324 }
2325 
com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter(JNIEnv * env,jclass,jint uidGidMin,jint uidGidMax)2326 static void com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter(
2327         JNIEnv* env, jclass, jint uidGidMin, jint uidGidMax) {
2328   if (!gIsSecurityEnforced) {
2329     ALOGI("seccomp disabled by setenforce 0");
2330     return;
2331   }
2332 
2333   bool installed = install_setuidgid_seccomp_filter(uidGidMin, uidGidMax);
2334   if (!installed) {
2335       RuntimeAbort(env, __LINE__, "Could not install setuid/setgid seccomp filter.");
2336   }
2337 }
2338 
2339 /**
2340  * Called from an unspecialized app process to specialize the process for a
2341  * given application.
2342  *
2343  * @param env  Managed runtime environment
2344  * @param uid  User ID of the new application
2345  * @param gid  Group ID of the new application
2346  * @param gids  Extra groups that the process belongs to
2347  * @param runtime_flags  Flags for changing the behavior of the managed runtime
2348  * @param rlimits  Resource limits
2349  * @param mount_external  The mode (read/write/normal) that external storage will be mounted with
2350  * @param se_info  SELinux policy information
2351  * @param nice_name  New name for this process
2352  * @param is_child_zygote  If the process is to become a WebViewZygote
2353  * @param instruction_set  The instruction set expected/requested by the new application
2354  * @param app_data_dir  Path to the application's data directory
2355  * @param is_top_app  If the process is for top (high priority) application
2356  */
com_android_internal_os_Zygote_nativeSpecializeAppProcess(JNIEnv * env,jclass,jint uid,jint gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jint mount_external,jstring se_info,jstring nice_name,jboolean is_child_zygote,jstring instruction_set,jstring app_data_dir,jboolean is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,jboolean mount_data_dirs,jboolean mount_storage_dirs)2357 static void com_android_internal_os_Zygote_nativeSpecializeAppProcess(
2358         JNIEnv* env, jclass, jint uid, jint gid, jintArray gids, jint runtime_flags,
2359         jobjectArray rlimits, jint mount_external, jstring se_info, jstring nice_name,
2360         jboolean is_child_zygote, jstring instruction_set, jstring app_data_dir,
2361         jboolean is_top_app, jobjectArray pkg_data_info_list,
2362         jobjectArray allowlisted_data_info_list, jboolean mount_data_dirs,
2363         jboolean mount_storage_dirs) {
2364     jlong capabilities = CalculateCapabilities(env, uid, gid, gids, is_child_zygote);
2365 
2366     SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, capabilities, capabilities,
2367                      mount_external, se_info, nice_name, false, is_child_zygote == JNI_TRUE,
2368                      instruction_set, app_data_dir, is_top_app == JNI_TRUE, pkg_data_info_list,
2369                      allowlisted_data_info_list, mount_data_dirs == JNI_TRUE,
2370                      mount_storage_dirs == JNI_TRUE);
2371 }
2372 
2373 /**
2374  * A helper method for fetching socket file descriptors that were opened by init from the
2375  * environment.
2376  *
2377  * @param env  Managed runtime environment
2378  * @param is_primary  If this process is the primary or secondary Zygote; used to compute the name
2379  * of the environment variable storing the file descriptors.
2380  */
com_android_internal_os_Zygote_nativeInitNativeState(JNIEnv * env,jclass,jboolean is_primary)2381 static void com_android_internal_os_Zygote_nativeInitNativeState(JNIEnv* env, jclass,
2382                                                                  jboolean is_primary) {
2383   /*
2384    * Obtain file descriptors created by init from the environment.
2385    */
2386 
2387   gZygoteSocketFD =
2388       android_get_control_socket(is_primary ? "zygote" : "zygote_secondary");
2389   if (gZygoteSocketFD >= 0) {
2390     ALOGV("Zygote:zygoteSocketFD = %d", gZygoteSocketFD);
2391   } else {
2392     ALOGE("Unable to fetch Zygote socket file descriptor");
2393   }
2394 
2395   gUsapPoolSocketFD =
2396       android_get_control_socket(is_primary ? "usap_pool_primary" : "usap_pool_secondary");
2397   if (gUsapPoolSocketFD >= 0) {
2398     ALOGV("Zygote:usapPoolSocketFD = %d", gUsapPoolSocketFD);
2399   } else {
2400     ALOGE("Unable to fetch USAP pool socket file descriptor");
2401   }
2402 
2403   initUnsolSocketToSystemServer();
2404 
2405   /*
2406    * Security Initialization
2407    */
2408 
2409   // security_getenforce is not allowed on app process. Initialize and cache
2410   // the value before zygote forks.
2411   gIsSecurityEnforced = security_getenforce();
2412 
2413   selinux_android_seapp_context_init();
2414 
2415   /*
2416    * Storage Initialization
2417    */
2418 
2419   UnmountStorageOnInit(env);
2420 
2421   /*
2422    * Performance Initialization
2423    */
2424 
2425   if (!SetTaskProfiles(0, {})) {
2426     zygote::ZygoteFailure(env, "zygote", nullptr, "Zygote SetTaskProfiles failed");
2427   }
2428 }
2429 
2430 /**
2431  * @param env  Managed runtime environment
2432  * @return  A managed array of raw file descriptors for the read ends of the USAP reporting
2433  * pipes.
2434  */
com_android_internal_os_Zygote_nativeGetUsapPipeFDs(JNIEnv * env,jclass)2435 static jintArray com_android_internal_os_Zygote_nativeGetUsapPipeFDs(JNIEnv* env, jclass) {
2436   std::vector<int> usap_fds = MakeUsapPipeReadFDVector();
2437 
2438   jintArray managed_usap_fds = env->NewIntArray(usap_fds.size());
2439   env->SetIntArrayRegion(managed_usap_fds, 0, usap_fds.size(), usap_fds.data());
2440 
2441   return managed_usap_fds;
2442 }
2443 
2444 /*
2445  * Add the given pid and file descriptor to the Usap table. CriticalNative method.
2446  */
com_android_internal_os_Zygote_nativeAddUsapTableEntry(jint pid,jint read_pipe_fd)2447 static void com_android_internal_os_Zygote_nativeAddUsapTableEntry(jint pid, jint read_pipe_fd) {
2448   AddUsapTableEntry(pid, read_pipe_fd);
2449 }
2450 
2451 /**
2452  * A JNI wrapper around RemoveUsapTableEntry. CriticalNative method.
2453  *
2454  * @param env  Managed runtime environment
2455  * @param usap_pid  Process ID of the USAP entry to invalidate
2456  * @return  True if an entry was invalidated; false otherwise.
2457  */
com_android_internal_os_Zygote_nativeRemoveUsapTableEntry(jint usap_pid)2458 static jboolean com_android_internal_os_Zygote_nativeRemoveUsapTableEntry(jint usap_pid) {
2459   return RemoveUsapTableEntry(usap_pid);
2460 }
2461 
2462 /**
2463  * Creates the USAP pool event FD if it doesn't exist and returns it.  This is used by the
2464  * ZygoteServer poll loop to know when to re-fill the USAP pool.
2465  *
2466  * @param env  Managed runtime environment
2467  * @return A raw event file descriptor used to communicate (from the signal handler) when the
2468  * Zygote receives a SIGCHLD for a USAP
2469  */
com_android_internal_os_Zygote_nativeGetUsapPoolEventFD(JNIEnv * env,jclass)2470 static jint com_android_internal_os_Zygote_nativeGetUsapPoolEventFD(JNIEnv* env, jclass) {
2471   if (gUsapPoolEventFD == -1) {
2472     if ((gUsapPoolEventFD = eventfd(0, 0)) == -1) {
2473       zygote::ZygoteFailure(env, "zygote", nullptr,
2474                             StringPrintf("Unable to create eventfd: %s", strerror(errno)));
2475     }
2476   }
2477 
2478   return gUsapPoolEventFD;
2479 }
2480 
2481 /**
2482  * @param env  Managed runtime environment
2483  * @return The number of USAPs currently in the USAP pool
2484  */
com_android_internal_os_Zygote_nativeGetUsapPoolCount(JNIEnv * env,jclass)2485 static jint com_android_internal_os_Zygote_nativeGetUsapPoolCount(JNIEnv* env, jclass) {
2486   return gUsapPoolCount;
2487 }
2488 
2489 /**
2490  * Kills all processes currently in the USAP pool and closes their read pipe
2491  * FDs.
2492  *
2493  * @param env  Managed runtime environment
2494  */
com_android_internal_os_Zygote_nativeEmptyUsapPool(JNIEnv * env,jclass)2495 static void com_android_internal_os_Zygote_nativeEmptyUsapPool(JNIEnv* env, jclass) {
2496   for (auto& entry : gUsapTable) {
2497     auto entry_storage = entry.GetValues();
2498 
2499     if (entry_storage.has_value()) {
2500       kill(entry_storage.value().pid, SIGTERM);
2501 
2502       // Clean up the USAP table entry here.  This avoids a potential race
2503       // where a newly created USAP might not be able to find a valid table
2504       // entry if signal handler (which would normally do the cleanup) doesn't
2505       // run between now and when the new process is created.
2506 
2507       close(entry_storage.value().read_pipe_fd);
2508 
2509       // Avoid a second atomic load by invalidating instead of clearing.
2510       entry.Invalidate();
2511       --gUsapPoolCount;
2512     }
2513   }
2514 }
2515 
com_android_internal_os_Zygote_nativeBlockSigTerm(JNIEnv * env,jclass)2516 static void com_android_internal_os_Zygote_nativeBlockSigTerm(JNIEnv* env, jclass) {
2517   auto fail_fn = std::bind(zygote::ZygoteFailure, env, "usap", nullptr, _1);
2518   BlockSignal(SIGTERM, fail_fn);
2519 }
2520 
com_android_internal_os_Zygote_nativeUnblockSigTerm(JNIEnv * env,jclass)2521 static void com_android_internal_os_Zygote_nativeUnblockSigTerm(JNIEnv* env, jclass) {
2522   auto fail_fn = std::bind(zygote::ZygoteFailure, env, "usap", nullptr, _1);
2523   UnblockSignal(SIGTERM, fail_fn);
2524 }
2525 
com_android_internal_os_Zygote_nativeBoostUsapPriority(JNIEnv * env,jclass)2526 static void com_android_internal_os_Zygote_nativeBoostUsapPriority(JNIEnv* env, jclass) {
2527   setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MAX);
2528 }
2529 
com_android_internal_os_Zygote_nativeParseSigChld(JNIEnv * env,jclass,jbyteArray in,jint length,jintArray out)2530 static jint com_android_internal_os_Zygote_nativeParseSigChld(JNIEnv* env, jclass, jbyteArray in,
2531                                                               jint length, jintArray out) {
2532     if (length != sizeof(struct UnsolicitedZygoteMessageSigChld)) {
2533         // Apparently it's not the message we are expecting.
2534         return -1;
2535     }
2536     if (in == nullptr || out == nullptr) {
2537         // Invalid parameter
2538         jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2539         return -1;
2540     }
2541     ScopedByteArrayRO source(env, in);
2542     if (source.size() < length) {
2543         // Invalid parameter
2544         jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2545         return -1;
2546     }
2547     const struct UnsolicitedZygoteMessageSigChld* msg =
2548             reinterpret_cast<const struct UnsolicitedZygoteMessageSigChld*>(source.get());
2549 
2550     switch (msg->header.type) {
2551         case UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD: {
2552             ScopedIntArrayRW buf(env, out);
2553             if (buf.size() != 3) {
2554                 jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2555                 return UNSOLICITED_ZYGOTE_MESSAGE_TYPE_RESERVED;
2556             }
2557             buf[0] = msg->payload.pid;
2558             buf[1] = msg->payload.uid;
2559             buf[2] = msg->payload.status;
2560             return 3;
2561         }
2562         default:
2563             break;
2564     }
2565     return -1;
2566 }
2567 
com_android_internal_os_Zygote_nativeSupportsMemoryTagging(JNIEnv * env,jclass)2568 static jboolean com_android_internal_os_Zygote_nativeSupportsMemoryTagging(JNIEnv* env, jclass) {
2569 #if defined(__aarch64__)
2570   return mte_supported();
2571 #else
2572   return false;
2573 #endif
2574 }
2575 
com_android_internal_os_Zygote_nativeSupportsTaggedPointers(JNIEnv * env,jclass)2576 static jboolean com_android_internal_os_Zygote_nativeSupportsTaggedPointers(JNIEnv* env, jclass) {
2577 #ifdef __aarch64__
2578   int res = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
2579   return res >= 0 && res & PR_TAGGED_ADDR_ENABLE;
2580 #else
2581   return false;
2582 #endif
2583 }
2584 
com_android_internal_os_Zygote_nativeCurrentTaggingLevel(JNIEnv * env,jclass)2585 static jint com_android_internal_os_Zygote_nativeCurrentTaggingLevel(JNIEnv* env, jclass) {
2586 #if defined(__aarch64__)
2587   int level = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
2588   if (level < 0) {
2589     ALOGE("Failed to get memory tag level: %s", strerror(errno));
2590     return 0;
2591   } else if (!(level & PR_TAGGED_ADDR_ENABLE)) {
2592     return 0;
2593   }
2594   // TBI is only possible on non-MTE hardware.
2595   if (!mte_supported()) {
2596     return MEMORY_TAG_LEVEL_TBI;
2597   }
2598 
2599   switch (level & PR_MTE_TCF_MASK) {
2600     case PR_MTE_TCF_NONE:
2601       return 0;
2602     case PR_MTE_TCF_SYNC:
2603       return MEMORY_TAG_LEVEL_SYNC;
2604     case PR_MTE_TCF_ASYNC:
2605     case PR_MTE_TCF_ASYNC | PR_MTE_TCF_SYNC:
2606       return MEMORY_TAG_LEVEL_ASYNC;
2607     default:
2608       ALOGE("Unknown memory tagging level: %i", level);
2609       return 0;
2610   }
2611 #else // defined(__aarch64__)
2612   return 0;
2613 #endif // defined(__aarch64__)
2614 }
2615 
com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload(JNIEnv * env,jclass)2616 static void com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload(JNIEnv* env, jclass) {
2617     // Ignore invocations when too early or too late.
2618     if (gPreloadFds) {
2619         return;
2620     }
2621 
2622     // App Zygote Preload starts soon. Save FDs remaining open.  After the
2623     // preload finishes newly open files will be determined.
2624     auto fail_fn = std::bind(zygote::ZygoteFailure, env, "zygote", nullptr, _1);
2625     gPreloadFds = GetOpenFds(fail_fn).release();
2626 }
2627 
com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload(JNIEnv * env,jclass)2628 static void com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload(JNIEnv* env, jclass) {
2629     // Ignore invocations when too early or too late.
2630     if (!gPreloadFds || gPreloadFdsExtracted) {
2631         return;
2632     }
2633 
2634     // Find the newly open FDs, if any.
2635     auto fail_fn = std::bind(zygote::ZygoteFailure, env, "zygote", nullptr, _1);
2636     std::unique_ptr<std::set<int>> current_fds = GetOpenFds(fail_fn);
2637     auto difference = std::make_unique<std::set<int>>();
2638     std::set_difference(current_fds->begin(), current_fds->end(), gPreloadFds->begin(),
2639                         gPreloadFds->end(), std::inserter(*difference, difference->end()));
2640     delete gPreloadFds;
2641     gPreloadFds = difference.release();
2642     gPreloadFdsExtracted = true;
2643 }
2644 
2645 static const JNINativeMethod gMethods[] = {
2646         {"nativeForkAndSpecialize",
2647          "(II[II[[IILjava/lang/String;Ljava/lang/String;[I[IZLjava/lang/String;Ljava/lang/"
2648          "String;Z[Ljava/lang/String;[Ljava/lang/String;ZZ)I",
2649          (void*)com_android_internal_os_Zygote_nativeForkAndSpecialize},
2650         {"nativeForkSystemServer", "(II[II[[IJJ)I",
2651          (void*)com_android_internal_os_Zygote_nativeForkSystemServer},
2652         {"nativeAllowFileAcrossFork", "(Ljava/lang/String;)V",
2653          (void*)com_android_internal_os_Zygote_nativeAllowFileAcrossFork},
2654         {"nativePreApplicationInit", "()V",
2655          (void*)com_android_internal_os_Zygote_nativePreApplicationInit},
2656         {"nativeInstallSeccompUidGidFilter", "(II)V",
2657          (void*)com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter},
2658         {"nativeForkApp", "(II[IZZ)I", (void*)com_android_internal_os_Zygote_nativeForkApp},
2659         // @CriticalNative
2660         {"nativeAddUsapTableEntry", "(II)V",
2661          (void*)com_android_internal_os_Zygote_nativeAddUsapTableEntry},
2662         {"nativeSpecializeAppProcess",
2663          "(II[II[[IILjava/lang/String;Ljava/lang/String;ZLjava/lang/String;Ljava/lang/"
2664          "String;Z[Ljava/lang/String;[Ljava/lang/String;ZZ)V",
2665          (void*)com_android_internal_os_Zygote_nativeSpecializeAppProcess},
2666         {"nativeInitNativeState", "(Z)V",
2667          (void*)com_android_internal_os_Zygote_nativeInitNativeState},
2668         {"nativeGetUsapPipeFDs", "()[I",
2669          (void*)com_android_internal_os_Zygote_nativeGetUsapPipeFDs},
2670         // @CriticalNative
2671         {"nativeAddUsapTableEntry", "(II)V",
2672          (void*)com_android_internal_os_Zygote_nativeAddUsapTableEntry},
2673         // @CriticalNative
2674         {"nativeRemoveUsapTableEntry", "(I)Z",
2675          (void*)com_android_internal_os_Zygote_nativeRemoveUsapTableEntry},
2676         {"nativeGetUsapPoolEventFD", "()I",
2677          (void*)com_android_internal_os_Zygote_nativeGetUsapPoolEventFD},
2678         {"nativeGetUsapPoolCount", "()I",
2679          (void*)com_android_internal_os_Zygote_nativeGetUsapPoolCount},
2680         {"nativeEmptyUsapPool", "()V", (void*)com_android_internal_os_Zygote_nativeEmptyUsapPool},
2681         {"nativeBlockSigTerm", "()V", (void*)com_android_internal_os_Zygote_nativeBlockSigTerm},
2682         {"nativeUnblockSigTerm", "()V", (void*)com_android_internal_os_Zygote_nativeUnblockSigTerm},
2683         {"nativeBoostUsapPriority", "()V",
2684          (void*)com_android_internal_os_Zygote_nativeBoostUsapPriority},
2685         {"nativeParseSigChld", "([BI[I)I",
2686          (void*)com_android_internal_os_Zygote_nativeParseSigChld},
2687         {"nativeSupportsMemoryTagging", "()Z",
2688          (void*)com_android_internal_os_Zygote_nativeSupportsMemoryTagging},
2689         {"nativeSupportsTaggedPointers", "()Z",
2690          (void*)com_android_internal_os_Zygote_nativeSupportsTaggedPointers},
2691         {"nativeCurrentTaggingLevel", "()I",
2692          (void*)com_android_internal_os_Zygote_nativeCurrentTaggingLevel},
2693         {"nativeMarkOpenedFilesBeforePreload", "()V",
2694          (void*)com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload},
2695         {"nativeAllowFilesOpenedByPreload", "()V",
2696          (void*)com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload},
2697 };
2698 
register_com_android_internal_os_Zygote(JNIEnv * env)2699 int register_com_android_internal_os_Zygote(JNIEnv* env) {
2700   gZygoteClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteClassName));
2701   gCallPostForkSystemServerHooks = GetStaticMethodIDOrDie(env, gZygoteClass,
2702                                                           "callPostForkSystemServerHooks",
2703                                                           "(I)V");
2704   gCallPostForkChildHooks = GetStaticMethodIDOrDie(env, gZygoteClass, "callPostForkChildHooks",
2705                                                    "(IZZLjava/lang/String;)V");
2706 
2707   gZygoteInitClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteInitClassName));
2708   gGetOrCreateSystemServerClassLoader =
2709           GetStaticMethodIDOrDie(env, gZygoteInitClass, "getOrCreateSystemServerClassLoader",
2710                                  "()Ljava/lang/ClassLoader;");
2711 
2712   RegisterMethodsOrDie(env, "com/android/internal/os/Zygote", gMethods, NELEM(gMethods));
2713 
2714   return JNI_OK;
2715 }
2716 }  // namespace android
2717