• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Zygote"
18 #define ATRACE_TAG ATRACE_TAG_DALVIK
19 
20 #include "com_android_internal_os_Zygote.h"
21 
22 #include <async_safe/log.h>
23 
24 // sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
25 #include <sys/mount.h>
26 #include <linux/fs.h>
27 #include <sys/types.h>
28 #include <dirent.h>
29 
30 #include <algorithm>
31 #include <array>
32 #include <atomic>
33 #include <functional>
34 #include <iterator>
35 #include <list>
36 #include <optional>
37 #include <sstream>
38 #include <string>
39 #include <string_view>
40 #include <unordered_set>
41 
42 #include <android/fdsan.h>
43 #include <arpa/inet.h>
44 #include <fcntl.h>
45 #include <grp.h>
46 #include <inttypes.h>
47 #include <malloc.h>
48 #include <mntent.h>
49 #include <paths.h>
50 #include <signal.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <sys/auxv.h>
54 #include <sys/capability.h>
55 #include <sys/cdefs.h>
56 #include <sys/eventfd.h>
57 #include <sys/personality.h>
58 #include <sys/prctl.h>
59 #include <sys/resource.h>
60 #include <sys/socket.h>
61 #include <sys/stat.h>
62 #include <sys/time.h>
63 #include <sys/types.h>
64 #include <sys/un.h>
65 #include <sys/wait.h>
66 #include <unistd.h>
67 
68 #include <android-base/file.h>
69 #include <android-base/logging.h>
70 #include <android-base/properties.h>
71 #include <android-base/stringprintf.h>
72 #include <android-base/unique_fd.h>
73 #include <bionic/malloc.h>
74 #include <bionic/mte.h>
75 #include <cutils/fs.h>
76 #include <cutils/multiuser.h>
77 #include <cutils/sockets.h>
78 #include <private/android_filesystem_config.h>
79 #include <processgroup/processgroup.h>
80 #include <processgroup/sched_policy.h>
81 #include <seccomp_policy.h>
82 #include <selinux/android.h>
83 #include <stats_socket.h>
84 #include <utils/String8.h>
85 #include <utils/Trace.h>
86 
87 #include <nativehelper/JNIHelp.h>
88 #include <nativehelper/ScopedLocalRef.h>
89 #include <nativehelper/ScopedPrimitiveArray.h>
90 #include <nativehelper/ScopedUtfChars.h>
91 #include "core_jni_helpers.h"
92 #include "fd_utils.h"
93 #include "filesystem_utils.h"
94 
95 #include "nativebridge/native_bridge.h"
96 
97 namespace {
98 
99 // TODO (chriswailes): Add a function to initialize native Zygote data.
100 // TODO (chriswailes): Fix mixed indentation style (2 and 4 spaces).
101 
102 using namespace std::placeholders;
103 
104 using android::String8;
105 using android::base::ReadFileToString;
106 using android::base::StringAppendF;
107 using android::base::StringPrintf;
108 using android::base::WriteStringToFile;
109 using android::base::GetBoolProperty;
110 
111 using android::zygote::ZygoteFailure;
112 
113 using Action = android_mallopt_gwp_asan_options_t::Action;
114 
115 // This type is duplicated in fd_utils.h
116 typedef const std::function<void(std::string)>& fail_fn_t;
117 
118 static pid_t gSystemServerPid = 0;
119 
120 static constexpr const char* kVoldAppDataIsolation = "persist.sys.vold_app_data_isolation_enabled";
121 static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
122 static jclass gZygoteClass;
123 static jmethodID gCallPostForkSystemServerHooks;
124 static jmethodID gCallPostForkChildHooks;
125 
126 static constexpr const char* kZygoteInitClassName = "com/android/internal/os/ZygoteInit";
127 static jclass gZygoteInitClass;
128 static jmethodID gGetOrCreateSystemServerClassLoader;
129 static jmethodID gPrefetchStandaloneSystemServerJars;
130 
131 static bool gIsSecurityEnforced = true;
132 
133 /**
134  * True if the app process is running in its mount namespace.
135  */
136 static bool gInAppMountNamespace = false;
137 
138 /**
139  * The maximum number of characters (not including a null terminator) that a
140  * process name may contain.
141  */
142 static constexpr size_t MAX_NAME_LENGTH = 15;
143 
144 /**
145  * The file descriptor for the Zygote socket opened by init.
146  */
147 
148 static int gZygoteSocketFD = -1;
149 
150 /**
151  * The file descriptor for the unspecialized app process (USAP) pool socket opened by init.
152  */
153 
154 static int gUsapPoolSocketFD = -1;
155 
156 /**
157  * The number of USAPs currently in this Zygote's pool.
158  */
159 static std::atomic_uint32_t gUsapPoolCount = 0;
160 
161 /**
162  * Event file descriptor used to communicate reaped USAPs to the
163  * ZygoteServer.
164  */
165 static int gUsapPoolEventFD = -1;
166 
167 /**
168  * The socket file descriptor used to send notifications to the
169  * system_server.
170  */
171 static int gSystemServerSocketFd = -1;
172 
173 static constexpr int DEFAULT_DATA_DIR_PERMISSION = 0751;
174 
175 static constexpr const uint64_t UPPER_HALF_WORD_MASK = 0xFFFF'FFFF'0000'0000;
176 static constexpr const uint64_t LOWER_HALF_WORD_MASK = 0x0000'0000'FFFF'FFFF;
177 
178 static constexpr const char* kCurProfileDirPath = "/data/misc/profiles/cur";
179 static constexpr const char* kRefProfileDirPath = "/data/misc/profiles/ref";
180 
181 /**
182  * The maximum value that the gUSAPPoolSizeMax variable may take.  This value
183  * is a mirror of ZygoteServer.USAP_POOL_SIZE_MAX_LIMIT
184  */
185 static constexpr int USAP_POOL_SIZE_MAX_LIMIT = 100;
186 
187 /** The numeric value for the maximum priority a process may possess. */
188 static constexpr int PROCESS_PRIORITY_MAX = -20;
189 
190 /** The numeric value for the minimum priority a process may possess. */
191 static constexpr int PROCESS_PRIORITY_MIN = 19;
192 
193 /** The numeric value for the normal priority a process should have. */
194 static constexpr int PROCESS_PRIORITY_DEFAULT = 0;
195 
196 /** Exponential back off parameters for storage dir check. */
197 static constexpr unsigned int STORAGE_DIR_CHECK_RETRY_MULTIPLIER = 2;
198 static constexpr unsigned int STORAGE_DIR_CHECK_INIT_INTERVAL_US = 50;
199 static constexpr unsigned int STORAGE_DIR_CHECK_MAX_INTERVAL_US = 1000;
200 /**
201  * Lower bound time we allow storage dir check to sleep.
202  * If it exceeds 2s, PROC_START_TIMEOUT_MSG will kill the starting app anyway,
203  * so it's fine to assume max retries is 5 mins.
204  */
205 static constexpr int STORAGE_DIR_CHECK_TIMEOUT_US = 1000 * 1000 * 60 * 5;
206 
207 static void WaitUntilDirReady(const std::string& target, fail_fn_t fail_fn);
208 
209 /**
210  * A helper class containing accounting information for USAPs.
211  */
212 class UsapTableEntry {
213  public:
214   struct EntryStorage {
215     int32_t pid;
216     int32_t read_pipe_fd;
217 
operator !=__anon7cad13bb0111::UsapTableEntry::EntryStorage218     bool operator!=(const EntryStorage& other) {
219       return pid != other.pid || read_pipe_fd != other.read_pipe_fd;
220     }
221   };
222 
223  private:
224   static constexpr EntryStorage INVALID_ENTRY_VALUE = {-1, -1};
225 
226   std::atomic<EntryStorage> mStorage;
227   static_assert(decltype(mStorage)::is_always_lock_free);  // Accessed from signal handler.
228 
229  public:
UsapTableEntry()230   constexpr UsapTableEntry() : mStorage(INVALID_ENTRY_VALUE) {}
231 
232   /**
233    * If the provided PID matches the one stored in this entry, the entry will
234    * be invalidated and the associated file descriptor will be closed.  If the
235    * PIDs don't match nothing will happen.
236    *
237    * @param pid The ID of the process who's entry we want to clear.
238    * @return True if the entry was cleared by this call; false otherwise
239    */
ClearForPID(int32_t pid)240   bool ClearForPID(int32_t pid) {
241     EntryStorage storage = mStorage.load();
242 
243     if (storage.pid == pid) {
244       /*
245        * There are three possible outcomes from this compare-and-exchange:
246        *   1) It succeeds, in which case we close the FD
247        *   2) It fails and the new value is INVALID_ENTRY_VALUE, in which case
248        *      the entry has already been cleared.
249        *   3) It fails and the new value isn't INVALID_ENTRY_VALUE, in which
250        *      case the entry has already been cleared and re-used.
251        *
252        * In all three cases the goal of the caller has been met, but only in
253        * the first case do we need to decrement the pool count.
254        */
255       if (mStorage.compare_exchange_strong(storage, INVALID_ENTRY_VALUE)) {
256         close(storage.read_pipe_fd);
257         return true;
258       } else {
259         return false;
260       }
261 
262     } else {
263       return false;
264     }
265   }
266 
Clear()267   void Clear() {
268     EntryStorage storage = mStorage.load();
269 
270     if (storage != INVALID_ENTRY_VALUE) {
271       close(storage.read_pipe_fd);
272       mStorage.store(INVALID_ENTRY_VALUE);
273     }
274   }
275 
Invalidate()276   void Invalidate() {
277     mStorage.store(INVALID_ENTRY_VALUE);
278   }
279 
280   /**
281    * @return A copy of the data stored in this entry.
282    */
GetValues()283   std::optional<EntryStorage> GetValues() {
284     EntryStorage storage = mStorage.load();
285 
286     if (storage != INVALID_ENTRY_VALUE) {
287       return storage;
288     } else {
289       return std::nullopt;
290     }
291   }
292 
293   /**
294    * Sets the entry to the given values if it is currently invalid.
295    *
296    * @param pid  The process ID for the new entry.
297    * @param read_pipe_fd  The read end of the USAP control pipe for this
298    * process.
299    * @return True if the entry was set; false otherwise.
300    */
SetIfInvalid(int32_t pid,int32_t read_pipe_fd)301   bool SetIfInvalid(int32_t pid, int32_t read_pipe_fd) {
302     EntryStorage new_value_storage;
303 
304     new_value_storage.pid = pid;
305     new_value_storage.read_pipe_fd = read_pipe_fd;
306 
307     EntryStorage expected = INVALID_ENTRY_VALUE;
308 
309     return mStorage.compare_exchange_strong(expected, new_value_storage);
310   }
311 };
312 
313 /**
314  * A table containing information about the USAPs currently in the pool.
315  *
316  * Multiple threads may be attempting to modify the table, either from the
317  * signal handler or from the ZygoteServer poll loop.  Atomic loads/stores in
318  * the USAPTableEntry class prevent data races during these concurrent
319  * operations.
320  */
321 static std::array<UsapTableEntry, USAP_POOL_SIZE_MAX_LIMIT> gUsapTable;
322 
323 /**
324  * The list of open zygote file descriptors.
325  */
326 static FileDescriptorTable* gOpenFdTable = nullptr;
327 
328 // Must match values in com.android.internal.os.Zygote.
329 // The values should be consistent with IVold.aidl
330 enum MountExternalKind {
331     MOUNT_EXTERNAL_NONE = 0,
332     MOUNT_EXTERNAL_DEFAULT = 1,
333     MOUNT_EXTERNAL_INSTALLER = 2,
334     MOUNT_EXTERNAL_PASS_THROUGH = 3,
335     MOUNT_EXTERNAL_ANDROID_WRITABLE = 4,
336     MOUNT_EXTERNAL_COUNT = 5
337 };
338 
339 // Must match values in com.android.internal.os.Zygote.
340 enum RuntimeFlags : uint32_t {
341     DEBUG_ENABLE_JDWP = 1,
342     PROFILE_FROM_SHELL = 1 << 15,
343     MEMORY_TAG_LEVEL_MASK = (1 << 19) | (1 << 20),
344     MEMORY_TAG_LEVEL_TBI = 1 << 19,
345     MEMORY_TAG_LEVEL_ASYNC = 2 << 19,
346     MEMORY_TAG_LEVEL_SYNC = 3 << 19,
347     GWP_ASAN_LEVEL_MASK = (1 << 21) | (1 << 22),
348     GWP_ASAN_LEVEL_NEVER = 0 << 21,
349     GWP_ASAN_LEVEL_LOTTERY = 1 << 21,
350     GWP_ASAN_LEVEL_ALWAYS = 2 << 21,
351     NATIVE_HEAP_ZERO_INIT_ENABLED = 1 << 23,
352     PROFILEABLE = 1 << 24,
353 };
354 
355 enum UnsolicitedZygoteMessageTypes : uint32_t {
356     UNSOLICITED_ZYGOTE_MESSAGE_TYPE_RESERVED = 0,
357     UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD = 1,
358 };
359 
360 struct UnsolicitedZygoteMessageSigChld {
361     struct {
362         UnsolicitedZygoteMessageTypes type;
363     } header;
364     struct {
365         pid_t pid;
366         uid_t uid;
367         int status;
368     } payload;
369 };
370 
371 // Keep sync with services/core/java/com/android/server/am/ProcessList.java
372 static constexpr struct sockaddr_un kSystemServerSockAddr =
373         {.sun_family = AF_LOCAL, .sun_path = "/data/system/unsolzygotesocket"};
374 
375 // Forward declaration so we don't have to move the signal handler.
376 static bool RemoveUsapTableEntry(pid_t usap_pid);
377 
RuntimeAbort(JNIEnv * env,int line,const char * msg)378 static void RuntimeAbort(JNIEnv* env, int line, const char* msg) {
379   std::ostringstream oss;
380   oss << __FILE__ << ":" << line << ": " << msg;
381   env->FatalError(oss.str().c_str());
382 }
383 
384 // Create the socket which is going to be used to send unsolicited message
385 // to system_server, the socket will be closed post forking a child process.
386 // It's expected to be called at each zygote's initialization.
initUnsolSocketToSystemServer()387 static void initUnsolSocketToSystemServer() {
388     gSystemServerSocketFd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_NONBLOCK, 0);
389     if (gSystemServerSocketFd >= 0) {
390         ALOGV("Zygote:systemServerSocketFD = %d", gSystemServerSocketFd);
391     } else {
392         ALOGE("Unable to create socket file descriptor to connect to system_server");
393     }
394 }
395 
sendSigChildStatus(const pid_t pid,const uid_t uid,const int status)396 static void sendSigChildStatus(const pid_t pid, const uid_t uid, const int status) {
397     int socketFd = gSystemServerSocketFd;
398     if (socketFd >= 0) {
399         // fill the message buffer
400         struct UnsolicitedZygoteMessageSigChld data =
401                 {.header = {.type = UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD},
402                  .payload = {.pid = pid, .uid = uid, .status = status}};
403         if (TEMP_FAILURE_RETRY(
404                     sendto(socketFd, &data, sizeof(data), 0,
405                            reinterpret_cast<const struct sockaddr*>(&kSystemServerSockAddr),
406                            sizeof(kSystemServerSockAddr))) == -1) {
407             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
408                                   "Zygote failed to write to system_server FD: %s",
409                                   strerror(errno));
410         }
411     }
412 }
413 
414 // This signal handler is for zygote mode, since the zygote must reap its children
SigChldHandler(int,siginfo_t * info,void *)415 static void SigChldHandler(int /*signal_number*/, siginfo_t* info, void* /*ucontext*/) {
416     pid_t pid;
417     int status;
418     int64_t usaps_removed = 0;
419 
420     // It's necessary to save and restore the errno during this function.
421     // Since errno is stored per thread, changing it here modifies the errno
422     // on the thread on which this signal handler executes. If a signal occurs
423     // between a call and an errno check, it's possible to get the errno set
424     // here.
425     // See b/23572286 for extra information.
426     int saved_errno = errno;
427 
428     while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
429         // Notify system_server that we received a SIGCHLD
430         sendSigChildStatus(pid, info->si_uid, status);
431         // Log process-death status that we care about.
432         if (WIFEXITED(status)) {
433             async_safe_format_log(ANDROID_LOG_INFO, LOG_TAG, "Process %d exited cleanly (%d)", pid,
434                                   WEXITSTATUS(status));
435 
436             // Check to see if the PID is in the USAP pool and remove it if it is.
437             if (RemoveUsapTableEntry(pid)) {
438                 ++usaps_removed;
439             }
440         } else if (WIFSIGNALED(status)) {
441             async_safe_format_log(ANDROID_LOG_INFO, LOG_TAG,
442                                   "Process %d exited due to signal %d (%s)%s", pid,
443                                   WTERMSIG(status), strsignal(WTERMSIG(status)),
444                                   WCOREDUMP(status) ? "; core dumped" : "");
445 
446             // If the process exited due to a signal other than SIGTERM, check to see
447             // if the PID is in the USAP pool and remove it if it is.  If the process
448             // was closed by the Zygote using SIGTERM then the USAP pool entry will
449             // have already been removed (see nativeEmptyUsapPool()).
450             if (WTERMSIG(status) != SIGTERM && RemoveUsapTableEntry(pid)) {
451                 ++usaps_removed;
452             }
453         }
454 
455         // If the just-crashed process is the system_server, bring down zygote
456         // so that it is restarted by init and system server will be restarted
457         // from there.
458         if (pid == gSystemServerPid) {
459             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
460                                   "Exit zygote because system server (pid %d) has terminated", pid);
461             kill(getpid(), SIGKILL);
462         }
463     }
464 
465     // Note that we shouldn't consider ECHILD an error because
466     // the secondary zygote might have no children left to wait for.
467     if (pid < 0 && errno != ECHILD) {
468         async_safe_format_log(ANDROID_LOG_WARN, LOG_TAG, "Zygote SIGCHLD error in waitpid: %s",
469                               strerror(errno));
470     }
471 
472     if (usaps_removed > 0) {
473         if (TEMP_FAILURE_RETRY(write(gUsapPoolEventFD, &usaps_removed, sizeof(usaps_removed))) ==
474             -1) {
475             // If this write fails something went terribly wrong.  We will now kill
476             // the zygote and let the system bring it back up.
477             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
478                                   "Zygote failed to write to USAP pool event FD: %s",
479                                   strerror(errno));
480             kill(getpid(), SIGKILL);
481         }
482     }
483 
484     errno = saved_errno;
485 }
486 
487 // Configures the SIGCHLD/SIGHUP handlers for the zygote process. This is
488 // configured very late, because earlier in the runtime we may fork() and
489 // exec() other processes, and we want to waitpid() for those rather than
490 // have them be harvested immediately.
491 //
492 // Ignore SIGHUP because all processes forked by the zygote are in the same
493 // process group as the zygote and we don't want to be notified if we become
494 // an orphaned group and have one or more stopped processes. This is not a
495 // theoretical concern :
496 // - we can become an orphaned group if one of our direct descendants forks
497 //   and is subsequently killed before its children.
498 // - crash_dump routinely STOPs the process it's tracing.
499 //
500 // See issues b/71965619 and b/25567761 for further details.
501 //
502 // This ends up being called repeatedly before each fork(), but there's
503 // no real harm in that.
SetSignalHandlers()504 static void SetSignalHandlers() {
505     struct sigaction sig_chld = {.sa_flags = SA_SIGINFO, .sa_sigaction = SigChldHandler};
506 
507     if (sigaction(SIGCHLD, &sig_chld, nullptr) < 0) {
508         ALOGW("Error setting SIGCHLD handler: %s", strerror(errno));
509     }
510 
511   struct sigaction sig_hup = {};
512   sig_hup.sa_handler = SIG_IGN;
513   if (sigaction(SIGHUP, &sig_hup, nullptr) < 0) {
514     ALOGW("Error setting SIGHUP handler: %s", strerror(errno));
515   }
516 }
517 
518 // Sets the SIGCHLD handler back to default behavior in zygote children.
UnsetChldSignalHandler()519 static void UnsetChldSignalHandler() {
520   struct sigaction sa;
521   memset(&sa, 0, sizeof(sa));
522   sa.sa_handler = SIG_DFL;
523 
524   if (sigaction(SIGCHLD, &sa, nullptr) < 0) {
525     ALOGW("Error unsetting SIGCHLD handler: %s", strerror(errno));
526   }
527 }
528 
529 // Calls POSIX setgroups() using the int[] object as an argument.
530 // A nullptr argument is tolerated.
SetGids(JNIEnv * env,jintArray managed_gids,jboolean is_child_zygote,fail_fn_t fail_fn)531 static void SetGids(JNIEnv* env, jintArray managed_gids, jboolean is_child_zygote,
532                     fail_fn_t fail_fn) {
533   if (managed_gids == nullptr) {
534     if (is_child_zygote) {
535       // For child zygotes like webview and app zygote, we want to clear out
536       // any supplemental groups the parent zygote had.
537       if (setgroups(0, NULL) == -1) {
538         fail_fn(CREATE_ERROR("Failed to remove supplementary groups for child zygote"));
539       }
540     }
541     return;
542   }
543 
544   ScopedIntArrayRO gids(env, managed_gids);
545   if (gids.get() == nullptr) {
546     fail_fn(CREATE_ERROR("Getting gids int array failed"));
547   }
548 
549   if (setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0])) == -1) {
550     fail_fn(CREATE_ERROR("setgroups failed: %s, gids.size=%zu", strerror(errno), gids.size()));
551   }
552 }
553 
ensureInAppMountNamespace(fail_fn_t fail_fn)554 static void ensureInAppMountNamespace(fail_fn_t fail_fn) {
555   if (gInAppMountNamespace) {
556     // In app mount namespace already
557     return;
558   }
559   if (unshare(CLONE_NEWNS) == -1) {
560     fail_fn(CREATE_ERROR("Failed to unshare(): %s", strerror(errno)));
561   }
562   gInAppMountNamespace = true;
563 }
564 
565 // Sets the resource limits via setrlimit(2) for the values in the
566 // two-dimensional array of integers that's passed in. The second dimension
567 // contains a tuple of length 3: (resource, rlim_cur, rlim_max). nullptr is
568 // treated as an empty array.
SetRLimits(JNIEnv * env,jobjectArray managed_rlimits,fail_fn_t fail_fn)569 static void SetRLimits(JNIEnv* env, jobjectArray managed_rlimits, fail_fn_t fail_fn) {
570   if (managed_rlimits == nullptr) {
571     return;
572   }
573 
574   rlimit rlim;
575   memset(&rlim, 0, sizeof(rlim));
576 
577   for (int i = 0; i < env->GetArrayLength(managed_rlimits); ++i) {
578     ScopedLocalRef<jobject>
579         managed_rlimit_object(env, env->GetObjectArrayElement(managed_rlimits, i));
580     ScopedIntArrayRO rlimit_handle(env, reinterpret_cast<jintArray>(managed_rlimit_object.get()));
581 
582     if (rlimit_handle.size() != 3) {
583       fail_fn(CREATE_ERROR("rlimits array must have a second dimension of size 3"));
584     }
585 
586     rlim.rlim_cur = rlimit_handle[1];
587     rlim.rlim_max = rlimit_handle[2];
588 
589     if (setrlimit(rlimit_handle[0], &rlim) == -1) {
590       fail_fn(CREATE_ERROR("setrlimit(%d, {%ld, %ld}) failed",
591                            rlimit_handle[0], rlim.rlim_cur, rlim.rlim_max));
592     }
593   }
594 }
595 
EnableDebugger()596 static void EnableDebugger() {
597   // To let a non-privileged gdbserver attach to this
598   // process, we must set our dumpable flag.
599   if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
600     ALOGE("prctl(PR_SET_DUMPABLE) failed");
601   }
602 
603   // A non-privileged native debugger should be able to attach to the debuggable app, even if Yama
604   // is enabled (see kernel/Documentation/security/Yama.txt).
605   if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == -1) {
606     // if Yama is off prctl(PR_SET_PTRACER) returns EINVAL - don't log in this
607     // case since it's expected behaviour.
608     if (errno != EINVAL) {
609       ALOGE("prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) failed");
610     }
611   }
612 
613   // Set the core dump size to zero unless wanted (see also coredump_setup in build/envsetup.sh).
614   if (!GetBoolProperty("persist.zygote.core_dump", false)) {
615     // Set the soft limit on core dump size to 0 without changing the hard limit.
616     rlimit rl;
617     if (getrlimit(RLIMIT_CORE, &rl) == -1) {
618       ALOGE("getrlimit(RLIMIT_CORE) failed");
619     } else {
620       rl.rlim_cur = 0;
621       if (setrlimit(RLIMIT_CORE, &rl) == -1) {
622         ALOGE("setrlimit(RLIMIT_CORE) failed");
623       }
624     }
625   }
626 }
627 
PreApplicationInit()628 static void PreApplicationInit() {
629   // The child process sets this to indicate it's not the zygote.
630   android_mallopt(M_SET_ZYGOTE_CHILD, nullptr, 0);
631 
632   // Set the jemalloc decay time to 1.
633   mallopt(M_DECAY_TIME, 1);
634 }
635 
SetUpSeccompFilter(uid_t uid,bool is_child_zygote)636 static void SetUpSeccompFilter(uid_t uid, bool is_child_zygote) {
637   if (!gIsSecurityEnforced) {
638     ALOGI("seccomp disabled by setenforce 0");
639     return;
640   }
641 
642   // Apply system or app filter based on uid.
643   if (uid >= AID_APP_START) {
644     if (is_child_zygote) {
645       set_app_zygote_seccomp_filter();
646     } else {
647       set_app_seccomp_filter();
648     }
649   } else {
650     set_system_seccomp_filter();
651   }
652 }
653 
EnableKeepCapabilities(fail_fn_t fail_fn)654 static void EnableKeepCapabilities(fail_fn_t fail_fn) {
655   if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == -1) {
656     fail_fn(CREATE_ERROR("prctl(PR_SET_KEEPCAPS) failed: %s", strerror(errno)));
657   }
658 }
659 
DropCapabilitiesBoundingSet(fail_fn_t fail_fn)660 static void DropCapabilitiesBoundingSet(fail_fn_t fail_fn) {
661   for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {;
662     if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0) == -1) {
663       if (errno == EINVAL) {
664         ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
665               "your kernel is compiled with file capabilities support");
666       } else {
667         fail_fn(CREATE_ERROR("prctl(PR_CAPBSET_DROP, %d) failed: %s", i, strerror(errno)));
668       }
669     }
670   }
671 }
672 
SetInheritable(uint64_t inheritable,fail_fn_t fail_fn)673 static void SetInheritable(uint64_t inheritable, fail_fn_t fail_fn) {
674   __user_cap_header_struct capheader;
675   memset(&capheader, 0, sizeof(capheader));
676   capheader.version = _LINUX_CAPABILITY_VERSION_3;
677   capheader.pid = 0;
678 
679   __user_cap_data_struct capdata[2];
680   if (capget(&capheader, &capdata[0]) == -1) {
681     fail_fn(CREATE_ERROR("capget failed: %s", strerror(errno)));
682   }
683 
684   capdata[0].inheritable = inheritable;
685   capdata[1].inheritable = inheritable >> 32;
686 
687   if (capset(&capheader, &capdata[0]) == -1) {
688     fail_fn(CREATE_ERROR("capset(inh=%" PRIx64 ") failed: %s", inheritable, strerror(errno)));
689   }
690 }
691 
SetCapabilities(uint64_t permitted,uint64_t effective,uint64_t inheritable,fail_fn_t fail_fn)692 static void SetCapabilities(uint64_t permitted, uint64_t effective, uint64_t inheritable,
693                             fail_fn_t fail_fn) {
694   __user_cap_header_struct capheader;
695   memset(&capheader, 0, sizeof(capheader));
696   capheader.version = _LINUX_CAPABILITY_VERSION_3;
697   capheader.pid = 0;
698 
699   __user_cap_data_struct capdata[2];
700   memset(&capdata, 0, sizeof(capdata));
701   capdata[0].effective = effective;
702   capdata[1].effective = effective >> 32;
703   capdata[0].permitted = permitted;
704   capdata[1].permitted = permitted >> 32;
705   capdata[0].inheritable = inheritable;
706   capdata[1].inheritable = inheritable >> 32;
707 
708   if (capset(&capheader, &capdata[0]) == -1) {
709     fail_fn(CREATE_ERROR("capset(perm=%" PRIx64 ", eff=%" PRIx64 ", inh=%" PRIx64 ") "
710                          "failed: %s", permitted, effective, inheritable, strerror(errno)));
711   }
712 }
713 
SetSchedulerPolicy(fail_fn_t fail_fn,bool is_top_app)714 static void SetSchedulerPolicy(fail_fn_t fail_fn, bool is_top_app) {
715   SchedPolicy policy = is_top_app ? SP_TOP_APP : SP_DEFAULT;
716 
717   if (is_top_app && cpusets_enabled()) {
718     errno = -set_cpuset_policy(0, policy);
719     if (errno != 0) {
720       fail_fn(CREATE_ERROR("set_cpuset_policy(0, %d) failed: %s", policy, strerror(errno)));
721     }
722   }
723 
724   errno = -set_sched_policy(0, policy);
725   if (errno != 0) {
726     fail_fn(CREATE_ERROR("set_sched_policy(0, %d) failed: %s", policy, strerror(errno)));
727   }
728 
729   // We are going to lose the permission to set scheduler policy during the specialization, so make
730   // sure that we don't cache the fd of cgroup path that may cause sepolicy violation by writing
731   // value to the cached fd directly when creating new thread.
732   DropTaskProfilesResourceCaching();
733 }
734 
UnmountTree(const char * path)735 static int UnmountTree(const char* path) {
736   ATRACE_CALL();
737 
738   size_t path_len = strlen(path);
739 
740   FILE* fp = setmntent("/proc/mounts", "r");
741   if (fp == nullptr) {
742     ALOGE("Error opening /proc/mounts: %s", strerror(errno));
743     return -errno;
744   }
745 
746   // Some volumes can be stacked on each other, so force unmount in
747   // reverse order to give us the best chance of success.
748   std::list<std::string> to_unmount;
749   mntent* mentry;
750   while ((mentry = getmntent(fp)) != nullptr) {
751     if (strncmp(mentry->mnt_dir, path, path_len) == 0) {
752       to_unmount.push_front(std::string(mentry->mnt_dir));
753     }
754   }
755   endmntent(fp);
756 
757   for (const auto& path : to_unmount) {
758     if (umount2(path.c_str(), MNT_DETACH)) {
759       ALOGW("Failed to unmount %s: %s", path.c_str(), strerror(errno));
760     }
761   }
762   return 0;
763 }
764 
PrepareDir(const std::string & dir,mode_t mode,uid_t uid,gid_t gid,fail_fn_t fail_fn)765 static void PrepareDir(const std::string& dir, mode_t mode, uid_t uid, gid_t gid,
766                       fail_fn_t fail_fn) {
767   if (fs_prepare_dir(dir.c_str(), mode, uid, gid) != 0) {
768     fail_fn(CREATE_ERROR("fs_prepare_dir failed on %s: %s",
769                          dir.c_str(), strerror(errno)));
770   }
771 }
772 
PrepareDirIfNotPresent(const std::string & dir,mode_t mode,uid_t uid,gid_t gid,fail_fn_t fail_fn)773 static void PrepareDirIfNotPresent(const std::string& dir, mode_t mode, uid_t uid, gid_t gid,
774                       fail_fn_t fail_fn) {
775   struct stat sb;
776   if (TEMP_FAILURE_RETRY(stat(dir.c_str(), &sb)) != -1) {
777     // Directory exists already
778     return;
779   }
780   PrepareDir(dir, mode, uid, gid, fail_fn);
781 }
782 
BindMount(const std::string & source_dir,const std::string & target_dir)783 static bool BindMount(const std::string& source_dir, const std::string& target_dir) {
784   return !(TEMP_FAILURE_RETRY(mount(source_dir.c_str(), target_dir.c_str(), nullptr,
785                                     MS_BIND | MS_REC, nullptr)) == -1);
786 }
787 
BindMount(const std::string & source_dir,const std::string & target_dir,fail_fn_t fail_fn)788 static void BindMount(const std::string& source_dir, const std::string& target_dir,
789                       fail_fn_t fail_fn) {
790   if (!BindMount(source_dir, target_dir)) {
791     fail_fn(CREATE_ERROR("Failed to mount %s to %s: %s",
792                          source_dir.c_str(), target_dir.c_str(), strerror(errno)));
793   }
794 }
795 
MountAppDataTmpFs(const std::string & target_dir,fail_fn_t fail_fn)796 static void MountAppDataTmpFs(const std::string& target_dir,
797                       fail_fn_t fail_fn) {
798   if (TEMP_FAILURE_RETRY(mount("tmpfs", target_dir.c_str(), "tmpfs",
799                                MS_NOSUID | MS_NODEV | MS_NOEXEC, "uid=0,gid=0,mode=0751")) == -1) {
800     fail_fn(CREATE_ERROR("Failed to mount tmpfs to %s: %s",
801                          target_dir.c_str(), strerror(errno)));
802   }
803 }
804 
805 // Create a private mount namespace and bind mount appropriate emulated
806 // storage for the given user.
MountEmulatedStorage(uid_t uid,jint mount_mode,bool force_mount_namespace,fail_fn_t fail_fn)807 static void MountEmulatedStorage(uid_t uid, jint mount_mode,
808         bool force_mount_namespace,
809         fail_fn_t fail_fn) {
810   // See storage config details at http://source.android.com/tech/storage/
811   ATRACE_CALL();
812 
813   if (mount_mode < 0 || mount_mode >= MOUNT_EXTERNAL_COUNT) {
814     fail_fn(CREATE_ERROR("Unknown mount_mode: %d", mount_mode));
815   }
816 
817   if (mount_mode == MOUNT_EXTERNAL_NONE && !force_mount_namespace) {
818     // Valid default of no storage visible
819     return;
820   }
821 
822   // Create a second private mount namespace for our process
823   ensureInAppMountNamespace(fail_fn);
824 
825   // Handle force_mount_namespace with MOUNT_EXTERNAL_NONE.
826   if (mount_mode == MOUNT_EXTERNAL_NONE) {
827     return;
828   }
829 
830   const userid_t user_id = multiuser_get_user_id(uid);
831   const std::string user_source = StringPrintf("/mnt/user/%d", user_id);
832   // Shell is neither AID_ROOT nor AID_EVERYBODY. Since it equally needs 'execute' access to
833   // /mnt/user/0 to 'adb shell ls /sdcard' for instance, we set the uid bit of /mnt/user/0 to
834   // AID_SHELL. This gives shell access along with apps running as group everybody (user 0 apps)
835   // These bits should be consistent with what is set in vold in
836   // Utils#MountUserFuse on FUSE volume mount
837   PrepareDir(user_source, 0710, user_id ? AID_ROOT : AID_SHELL,
838              multiuser_get_uid(user_id, AID_EVERYBODY), fail_fn);
839 
840   bool isAppDataIsolationEnabled = GetBoolProperty(kVoldAppDataIsolation, false);
841 
842   if (mount_mode == MOUNT_EXTERNAL_PASS_THROUGH) {
843       const std::string pass_through_source = StringPrintf("/mnt/pass_through/%d", user_id);
844       PrepareDir(pass_through_source, 0710, AID_ROOT, AID_MEDIA_RW, fail_fn);
845       BindMount(pass_through_source, "/storage", fail_fn);
846   } else if (mount_mode == MOUNT_EXTERNAL_INSTALLER) {
847       const std::string installer_source = StringPrintf("/mnt/installer/%d", user_id);
848       BindMount(installer_source, "/storage", fail_fn);
849   } else if (isAppDataIsolationEnabled && mount_mode == MOUNT_EXTERNAL_ANDROID_WRITABLE) {
850       const std::string writable_source = StringPrintf("/mnt/androidwritable/%d", user_id);
851       BindMount(writable_source, "/storage", fail_fn);
852   } else {
853       BindMount(user_source, "/storage", fail_fn);
854   }
855 }
856 
857 // Utility to close down the Zygote socket file descriptors while
858 // the child is still running as root with Zygote's privileges.  Each
859 // descriptor (if any) is closed via dup3(), replacing it with a valid
860 // (open) descriptor to /dev/null.
861 
DetachDescriptors(JNIEnv * env,const std::vector<int> & fds_to_close,fail_fn_t fail_fn)862 static void DetachDescriptors(JNIEnv* env,
863                               const std::vector<int>& fds_to_close,
864                               fail_fn_t fail_fn) {
865 
866   if (fds_to_close.size() > 0) {
867     android::base::unique_fd devnull_fd(open("/dev/null", O_RDWR | O_CLOEXEC));
868     if (devnull_fd == -1) {
869       fail_fn(std::string("Failed to open /dev/null: ").append(strerror(errno)));
870     }
871 
872     for (int fd : fds_to_close) {
873       ALOGV("Switching descriptor %d to /dev/null", fd);
874       if (TEMP_FAILURE_RETRY(dup3(devnull_fd, fd, O_CLOEXEC)) == -1) {
875         fail_fn(StringPrintf("Failed dup3() on descriptor %d: %s", fd, strerror(errno)));
876       }
877     }
878   }
879 }
880 
SetThreadName(const std::string & thread_name)881 void SetThreadName(const std::string& thread_name) {
882   bool hasAt = false;
883   bool hasDot = false;
884 
885   for (const char str_el : thread_name) {
886     if (str_el == '.') {
887       hasDot = true;
888     } else if (str_el == '@') {
889       hasAt = true;
890     }
891   }
892 
893   const char* name_start_ptr = thread_name.c_str();
894   if (thread_name.length() >= MAX_NAME_LENGTH && !hasAt && hasDot) {
895     name_start_ptr += thread_name.length() - MAX_NAME_LENGTH;
896   }
897 
898   // pthread_setname_np fails rather than truncating long strings.
899   char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
900   strlcpy(buf, name_start_ptr, sizeof(buf));
901   errno = pthread_setname_np(pthread_self(), buf);
902   if (errno != 0) {
903     ALOGW("Unable to set the name of current thread to '%s': %s", buf, strerror(errno));
904   }
905   // Update base::logging default tag.
906   android::base::SetDefaultTag(buf);
907 }
908 
909 /**
910  * A helper method for converting managed strings to native strings.  A fatal
911  * error is generated if a problem is encountered in extracting a non-null
912  * string.
913  *
914  * @param env  Managed runtime environment
915  * @param process_name  A native representation of the process name
916  * @param managed_process_name  A managed representation of the process name
917  * @param managed_string  The managed string to extract
918  *
919  * @return An empty option if the managed string is null.  A optional-wrapped
920  * string otherwise.
921  */
ExtractJString(JNIEnv * env,const char * process_name,jstring managed_process_name,jstring managed_string)922 static std::optional<std::string> ExtractJString(JNIEnv* env,
923                                                  const char* process_name,
924                                                  jstring managed_process_name,
925                                                  jstring managed_string) {
926   if (managed_string == nullptr) {
927     return std::nullopt;
928   } else {
929     ScopedUtfChars scoped_string_chars(env, managed_string);
930 
931     if (scoped_string_chars.c_str() != nullptr) {
932       return std::optional<std::string>(scoped_string_chars.c_str());
933     } else {
934       ZygoteFailure(env, process_name, managed_process_name, "Failed to extract JString.");
935     }
936   }
937 }
938 
939 /**
940  * A helper method for converting managed string arrays to native vectors.  A
941  * fatal error is generated if a problem is encountered in extracting a non-null array.
942  *
943  * @param env  Managed runtime environment
944  * @param process_name  A native representation of the process name
945  * @param managed_process_name  A managed representation of the process name
946  * @param managed_array  The managed integer array to extract
947  *
948  * @return An empty option if the managed array is null.  A optional-wrapped
949  * vector otherwise.
950  */
ExtractJIntArray(JNIEnv * env,const char * process_name,jstring managed_process_name,jintArray managed_array)951 static std::optional<std::vector<int>> ExtractJIntArray(JNIEnv* env,
952                                                         const char* process_name,
953                                                         jstring managed_process_name,
954                                                         jintArray managed_array) {
955   if (managed_array == nullptr) {
956     return std::nullopt;
957   } else {
958     ScopedIntArrayRO managed_array_handle(env, managed_array);
959 
960     if (managed_array_handle.get() != nullptr) {
961       std::vector<int> native_array;
962       native_array.reserve(managed_array_handle.size());
963 
964       for (size_t array_index = 0; array_index < managed_array_handle.size(); ++array_index) {
965         native_array.push_back(managed_array_handle[array_index]);
966       }
967 
968       return std::move(native_array);
969 
970     } else {
971       ZygoteFailure(env, process_name, managed_process_name, "Failed to extract JIntArray.");
972     }
973   }
974 }
975 
976 /**
977  * A utility function for blocking signals.
978  *
979  * @param signum  Signal number to block
980  * @param fail_fn  Fatal error reporting function
981  *
982  * @see ZygoteFailure
983  */
BlockSignal(int signum,fail_fn_t fail_fn)984 static void BlockSignal(int signum, fail_fn_t fail_fn) {
985   sigset_t sigs;
986   sigemptyset(&sigs);
987   sigaddset(&sigs, signum);
988 
989   if (sigprocmask(SIG_BLOCK, &sigs, nullptr) == -1) {
990     fail_fn(CREATE_ERROR("Failed to block signal %s: %s", strsignal(signum), strerror(errno)));
991   }
992 }
993 
994 
995 /**
996  * A utility function for unblocking signals.
997  *
998  * @param signum  Signal number to unblock
999  * @param fail_fn  Fatal error reporting function
1000  *
1001  * @see ZygoteFailure
1002  */
UnblockSignal(int signum,fail_fn_t fail_fn)1003 static void UnblockSignal(int signum, fail_fn_t fail_fn) {
1004   sigset_t sigs;
1005   sigemptyset(&sigs);
1006   sigaddset(&sigs, signum);
1007 
1008   if (sigprocmask(SIG_UNBLOCK, &sigs, nullptr) == -1) {
1009     fail_fn(CREATE_ERROR("Failed to un-block signal %s: %s", strsignal(signum), strerror(errno)));
1010   }
1011 }
1012 
ClearUsapTable()1013 static void ClearUsapTable() {
1014   for (UsapTableEntry& entry : gUsapTable) {
1015     entry.Clear();
1016   }
1017 
1018   gUsapPoolCount = 0;
1019 }
1020 
1021 // Create an app data directory over tmpfs overlayed CE / DE storage, and bind mount it
1022 // from the actual app data directory in data mirror.
createAndMountAppData(std::string_view package_name,std::string_view mirror_pkg_dir_name,std::string_view mirror_data_path,std::string_view actual_data_path,fail_fn_t fail_fn,bool call_fail_fn)1023 static bool createAndMountAppData(std::string_view package_name,
1024     std::string_view mirror_pkg_dir_name, std::string_view mirror_data_path,
1025     std::string_view actual_data_path, fail_fn_t fail_fn, bool call_fail_fn) {
1026 
1027   char mirrorAppDataPath[PATH_MAX];
1028   char actualAppDataPath[PATH_MAX];
1029   snprintf(mirrorAppDataPath, PATH_MAX, "%s/%s", mirror_data_path.data(),
1030       mirror_pkg_dir_name.data());
1031   snprintf(actualAppDataPath, PATH_MAX, "%s/%s", actual_data_path.data(), package_name.data());
1032 
1033   PrepareDir(actualAppDataPath, 0700, AID_ROOT, AID_ROOT, fail_fn);
1034 
1035   // Bind mount from original app data directory in mirror.
1036   if (call_fail_fn) {
1037     BindMount(mirrorAppDataPath, actualAppDataPath, fail_fn);
1038   } else if(!BindMount(mirrorAppDataPath, actualAppDataPath)) {
1039     ALOGW("Failed to mount %s to %s: %s",
1040           mirrorAppDataPath, actualAppDataPath, strerror(errno));
1041     return false;
1042   }
1043   return true;
1044 }
1045 
1046 // There is an app data directory over tmpfs overlaid CE / DE storage
1047 // bind mount it from the actual app data directory in data mirror.
mountAppData(std::string_view package_name,std::string_view mirror_pkg_dir_name,std::string_view mirror_data_path,std::string_view actual_data_path,fail_fn_t fail_fn)1048 static void mountAppData(std::string_view package_name,
1049     std::string_view mirror_pkg_dir_name, std::string_view mirror_data_path,
1050     std::string_view actual_data_path, fail_fn_t fail_fn) {
1051 
1052   char mirrorAppDataPath[PATH_MAX];
1053   char actualAppDataPath[PATH_MAX];
1054   snprintf(mirrorAppDataPath, PATH_MAX, "%s/%s", mirror_data_path.data(),
1055       mirror_pkg_dir_name.data());
1056   snprintf(actualAppDataPath, PATH_MAX, "%s/%s", actual_data_path.data(), package_name.data());
1057 
1058   // Bind mount from original app data directory in mirror.
1059   BindMount(mirrorAppDataPath, actualAppDataPath, fail_fn);
1060 }
1061 
1062 // Get the directory name stored in /data/data. If device is unlocked it should be the same as
1063 // package name, otherwise it will be an encrypted name but with same inode number.
getAppDataDirName(std::string_view parent_path,std::string_view package_name,long long ce_data_inode,fail_fn_t fail_fn)1064 static std::string getAppDataDirName(std::string_view parent_path, std::string_view package_name,
1065       long long ce_data_inode, fail_fn_t fail_fn) {
1066   // Check if directory exists
1067   char tmpPath[PATH_MAX];
1068   snprintf(tmpPath, PATH_MAX, "%s/%s", parent_path.data(), package_name.data());
1069   struct stat s;
1070   int err = stat(tmpPath, &s);
1071   if (err == 0) {
1072     // Directory exists, so return the directory name
1073     return package_name.data();
1074   } else {
1075     if (errno != ENOENT) {
1076       fail_fn(CREATE_ERROR("Unexpected error in getAppDataDirName: %s", strerror(errno)));
1077       return nullptr;
1078     }
1079     {
1080       // Directory doesn't exist, try to search the name from inode
1081       std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(parent_path.data()), closedir);
1082       if (dir == nullptr) {
1083         fail_fn(CREATE_ERROR("Failed to opendir %s", parent_path.data()));
1084       }
1085       struct dirent* ent;
1086       while ((ent = readdir(dir.get()))) {
1087         if (ent->d_ino == ce_data_inode) {
1088           return ent->d_name;
1089         }
1090       }
1091     }
1092 
1093     // Fallback due to b/145989852, ce_data_inode stored in package manager may be corrupted
1094     // if ino_t is 32 bits.
1095     ino_t fixed_ce_data_inode = 0;
1096     if ((ce_data_inode & UPPER_HALF_WORD_MASK) == UPPER_HALF_WORD_MASK) {
1097       fixed_ce_data_inode = ce_data_inode & LOWER_HALF_WORD_MASK;
1098     } else if ((ce_data_inode & LOWER_HALF_WORD_MASK) == LOWER_HALF_WORD_MASK) {
1099       fixed_ce_data_inode = ((ce_data_inode >> 32) & LOWER_HALF_WORD_MASK);
1100     }
1101     if (fixed_ce_data_inode != 0) {
1102       std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(parent_path.data()), closedir);
1103       if (dir == nullptr) {
1104         fail_fn(CREATE_ERROR("Failed to opendir %s", parent_path.data()));
1105       }
1106       struct dirent* ent;
1107       while ((ent = readdir(dir.get()))) {
1108         if (ent->d_ino == fixed_ce_data_inode) {
1109           long long d_ino = ent->d_ino;
1110           ALOGW("Fallback success inode %lld -> %lld", ce_data_inode, d_ino);
1111           return ent->d_name;
1112         }
1113       }
1114     }
1115     // Fallback done
1116 
1117     fail_fn(CREATE_ERROR("Unable to find %s:%lld in %s", package_name.data(),
1118         ce_data_inode, parent_path.data()));
1119     return nullptr;
1120   }
1121 }
1122 
1123 // Isolate app's data directory, by mounting a tmpfs on CE DE storage,
1124 // and create and bind mount app data in related_packages.
isolateAppDataPerPackage(int userId,std::string_view package_name,std::string_view volume_uuid,long long ce_data_inode,std::string_view actualCePath,std::string_view actualDePath,fail_fn_t fail_fn)1125 static void isolateAppDataPerPackage(int userId, std::string_view package_name,
1126     std::string_view volume_uuid, long long ce_data_inode, std::string_view actualCePath,
1127     std::string_view actualDePath, fail_fn_t fail_fn) {
1128 
1129   char mirrorCePath[PATH_MAX];
1130   char mirrorDePath[PATH_MAX];
1131   char mirrorCeParent[PATH_MAX];
1132   snprintf(mirrorCeParent, PATH_MAX, "/data_mirror/data_ce/%s", volume_uuid.data());
1133   snprintf(mirrorCePath, PATH_MAX, "%s/%d", mirrorCeParent, userId);
1134   snprintf(mirrorDePath, PATH_MAX, "/data_mirror/data_de/%s/%d", volume_uuid.data(), userId);
1135 
1136   createAndMountAppData(package_name, package_name, mirrorDePath, actualDePath, fail_fn,
1137                         true /*call_fail_fn*/);
1138 
1139   std::string ce_data_path = getAppDataDirName(mirrorCePath, package_name, ce_data_inode, fail_fn);
1140   if (!createAndMountAppData(package_name, ce_data_path, mirrorCePath, actualCePath, fail_fn,
1141                              false /*call_fail_fn*/)) {
1142     // CE might unlocks and the name is decrypted
1143     // get the name and mount again
1144     ce_data_path=getAppDataDirName(mirrorCePath, package_name, ce_data_inode, fail_fn);
1145     mountAppData(package_name, ce_data_path, mirrorCePath, actualCePath, fail_fn);
1146   }
1147 }
1148 
1149 // Relabel directory
relabelDir(const char * path,const char * context,fail_fn_t fail_fn)1150 static void relabelDir(const char* path, const char* context, fail_fn_t fail_fn) {
1151   if (setfilecon(path, context) != 0) {
1152     fail_fn(CREATE_ERROR("Failed to setfilecon %s %s", path, strerror(errno)));
1153   }
1154 }
1155 
1156 // Relabel all directories under a path non-recursively.
relabelAllDirs(const char * path,const char * context,fail_fn_t fail_fn)1157 static void relabelAllDirs(const char* path, const char* context, fail_fn_t fail_fn) {
1158   DIR* dir = opendir(path);
1159   if (dir == nullptr) {
1160     fail_fn(CREATE_ERROR("Failed to opendir %s", path));
1161   }
1162   struct dirent* ent;
1163   while ((ent = readdir(dir))) {
1164     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1165     auto filePath = StringPrintf("%s/%s", path, ent->d_name);
1166     if (ent->d_type == DT_DIR) {
1167       relabelDir(filePath.c_str(), context, fail_fn);
1168     } else if (ent->d_type == DT_LNK) {
1169       if (lsetfilecon(filePath.c_str(), context) != 0) {
1170         fail_fn(CREATE_ERROR("Failed to lsetfilecon %s %s", filePath.c_str(), strerror(errno)));
1171       }
1172     } else {
1173       fail_fn(CREATE_ERROR("Unexpected type: %d %s", ent->d_type, filePath.c_str()));
1174     }
1175   }
1176   closedir(dir);
1177 }
1178 
is_sdk_sandbox_uid(uid_t uid)1179 static bool is_sdk_sandbox_uid(uid_t uid) {
1180     appid_t appId = multiuser_get_app_id(uid);
1181     return appId >= AID_SDK_SANDBOX_PROCESS_START && appId <= AID_SDK_SANDBOX_PROCESS_END;
1182 }
1183 
1184 /**
1185  * Make other apps data directory not visible in CE, DE storage.
1186  *
1187  * Apps without app data isolation can detect if another app is installed on system,
1188  * by "touching" other apps data directory like /data/data/com.whatsapp, if it returns
1189  * "Permission denied" it means apps installed, otherwise it returns "File not found".
1190  * Traditional file permissions or SELinux can only block accessing those directories but
1191  * can't fix fingerprinting like this.
1192  * We fix it by "overlaying" data directory, and only relevant app data packages exists
1193  * in data directories.
1194  *
1195  * Steps:
1196  * 1). Collect a list of all related apps (apps with same uid and allowlisted apps) data info
1197  * (package name, data stored volume uuid, and inode number of its CE data directory)
1198  * 2). Mount tmpfs on /data/data, /data/user(_de) and /mnt/expand, so apps no longer
1199  * able to access apps data directly.
1200  * 3). For each related app, create its app data directory and bind mount the actual content
1201  * from apps data mirror directory. This works on both CE and DE storage, as DE storage
1202  * is always available even storage is FBE locked, while we use inode number to find
1203  * the encrypted DE directory in mirror so we can still bind mount it successfully.
1204  *
1205  * Example:
1206  * 0). Assuming com.android.foo CE data is stored in /data/data and no shared uid
1207  * 1). Mount a tmpfs on /data/data, /data/user, /data/user_de, /mnt/expand
1208  * List = ["com.android.foo", "null" (volume uuid "null"=default),
1209  * 123456 (inode number)]
1210  * 2). On DE storage, we create a directory /data/user_de/0/com.com.android.foo, and bind
1211  * mount (in the app's mount namespace) it from /data_mirror/data_de/0/com.android.foo.
1212  * 3). We do similar for CE storage. But in direct boot mode, as /data_mirror/data_ce/0/ is
1213  * encrypted, we can't find a directory with name com.android.foo on it, so we will
1214  * use the inode number to find the right directory instead, which that directory content will
1215  * be decrypted after storage is decrypted.
1216  *
1217  */
isolateAppData(JNIEnv * env,const std::vector<std::string> & merged_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1218 static void isolateAppData(JNIEnv* env, const std::vector<std::string>& merged_data_info_list,
1219     uid_t uid, const char* process_name,
1220     jstring managed_nice_name, fail_fn_t fail_fn) {
1221 
1222   const userid_t userId = multiuser_get_user_id(uid);
1223 
1224   int size = merged_data_info_list.size();
1225 
1226   // Mount tmpfs on all possible data directories, so app no longer see the original apps data.
1227   char internalCePath[PATH_MAX];
1228   char internalLegacyCePath[PATH_MAX];
1229   char internalDePath[PATH_MAX];
1230   char externalPrivateMountPath[PATH_MAX];
1231 
1232   snprintf(internalCePath, PATH_MAX, "/data/user");
1233   snprintf(internalLegacyCePath, PATH_MAX, "/data/data");
1234   snprintf(internalDePath, PATH_MAX, "/data/user_de");
1235   snprintf(externalPrivateMountPath, PATH_MAX, "/mnt/expand");
1236 
1237   char* dataDataContext = nullptr;
1238   if (getfilecon(internalDePath, &dataDataContext) < 0) {
1239     fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", internalDePath,
1240         strerror(errno)));
1241   }
1242 
1243   MountAppDataTmpFs(internalLegacyCePath, fail_fn);
1244   MountAppDataTmpFs(internalCePath, fail_fn);
1245   MountAppDataTmpFs(internalDePath, fail_fn);
1246 
1247   // Mount tmpfs on all external vols DE and CE storage
1248   DIR* dir = opendir(externalPrivateMountPath);
1249   if (dir == nullptr) {
1250     fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1251   }
1252   struct dirent* ent;
1253   while ((ent = readdir(dir))) {
1254     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1255     if (ent->d_type != DT_DIR) {
1256       fail_fn(CREATE_ERROR("Unexpected type: %d %s", ent->d_type, ent->d_name));
1257     }
1258     auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1259     auto cePath = StringPrintf("%s/user", volPath.c_str());
1260     auto dePath = StringPrintf("%s/user_de", volPath.c_str());
1261     // Wait until dir user is created.
1262     WaitUntilDirReady(cePath.c_str(), fail_fn);
1263     MountAppDataTmpFs(cePath.c_str(), fail_fn);
1264     // Wait until dir user_de is created.
1265     WaitUntilDirReady(dePath.c_str(), fail_fn);
1266     MountAppDataTmpFs(dePath.c_str(), fail_fn);
1267   }
1268   closedir(dir);
1269 
1270   // No bind mounting of app data should occur in the case of a sandbox process since SDK sandboxes
1271   // should not be able to read app data. Tmpfs was mounted however since a sandbox should not have
1272   // access to app data.
1273   if (!is_sdk_sandbox_uid(uid)) {
1274       // Prepare default dirs for user 0 as user 0 always exists.
1275       int result = symlink("/data/data", "/data/user/0");
1276       if (result != 0) {
1277           fail_fn(CREATE_ERROR("Failed to create symlink /data/user/0 %s", strerror(errno)));
1278       }
1279       PrepareDirIfNotPresent("/data/user_de/0", DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1280                              fail_fn);
1281 
1282       for (int i = 0; i < size; i += 3) {
1283           std::string const& packageName = merged_data_info_list[i];
1284           std::string const& volUuid = merged_data_info_list[i + 1];
1285           std::string const& inode = merged_data_info_list[i + 2];
1286 
1287           std::string::size_type sz;
1288           long long ceDataInode = std::stoll(inode, &sz);
1289 
1290           std::string actualCePath, actualDePath;
1291           if (volUuid.compare("null") != 0) {
1292               // Volume that is stored in /mnt/expand
1293               char volPath[PATH_MAX];
1294               char volCePath[PATH_MAX];
1295               char volDePath[PATH_MAX];
1296               char volCeUserPath[PATH_MAX];
1297               char volDeUserPath[PATH_MAX];
1298 
1299               snprintf(volPath, PATH_MAX, "/mnt/expand/%s", volUuid.c_str());
1300               snprintf(volCePath, PATH_MAX, "%s/user", volPath);
1301               snprintf(volDePath, PATH_MAX, "%s/user_de", volPath);
1302               snprintf(volCeUserPath, PATH_MAX, "%s/%d", volCePath, userId);
1303               snprintf(volDeUserPath, PATH_MAX, "%s/%d", volDePath, userId);
1304 
1305               PrepareDirIfNotPresent(volPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1306                                      fail_fn);
1307               PrepareDirIfNotPresent(volCePath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1308                                      fail_fn);
1309               PrepareDirIfNotPresent(volDePath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1310                                      fail_fn);
1311               PrepareDirIfNotPresent(volCeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1312                                      fail_fn);
1313               PrepareDirIfNotPresent(volDeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1314                                      fail_fn);
1315 
1316               actualCePath = volCeUserPath;
1317               actualDePath = volDeUserPath;
1318           } else {
1319               // Internal volume that stored in /data
1320               char internalCeUserPath[PATH_MAX];
1321               char internalDeUserPath[PATH_MAX];
1322               snprintf(internalCeUserPath, PATH_MAX, "/data/user/%d", userId);
1323               snprintf(internalDeUserPath, PATH_MAX, "/data/user_de/%d", userId);
1324               // If it's not user 0, create /data/user/$USER.
1325               if (userId == 0) {
1326                   actualCePath = internalLegacyCePath;
1327               } else {
1328                   PrepareDirIfNotPresent(internalCeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT,
1329                                          AID_ROOT, fail_fn);
1330                   actualCePath = internalCeUserPath;
1331               }
1332               PrepareDirIfNotPresent(internalDeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT,
1333                                      AID_ROOT, fail_fn);
1334               actualDePath = internalDeUserPath;
1335           }
1336           isolateAppDataPerPackage(userId, packageName, volUuid, ceDataInode, actualCePath,
1337                                    actualDePath, fail_fn);
1338       }
1339   }
1340 
1341   // We set the label AFTER everything is done, as we are applying
1342   // the file operations on tmpfs. If we set the label when we mount
1343   // tmpfs, SELinux will not happy as we are changing system_data_files.
1344   // Relabel dir under /data/user, including /data/user/0
1345   relabelAllDirs(internalCePath, dataDataContext, fail_fn);
1346 
1347   // Relabel /data/user
1348   relabelDir(internalCePath, dataDataContext, fail_fn);
1349 
1350   // Relabel /data/data
1351   relabelDir(internalLegacyCePath, dataDataContext, fail_fn);
1352 
1353   // Relabel dir under /data/user_de
1354   relabelAllDirs(internalDePath, dataDataContext, fail_fn);
1355 
1356   // Relabel /data/user_de
1357   relabelDir(internalDePath, dataDataContext, fail_fn);
1358 
1359   // Relabel CE and DE dirs under /mnt/expand
1360   dir = opendir(externalPrivateMountPath);
1361   if (dir == nullptr) {
1362     fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1363   }
1364   while ((ent = readdir(dir))) {
1365     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1366     auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1367     auto cePath = StringPrintf("%s/user", volPath.c_str());
1368     auto dePath = StringPrintf("%s/user_de", volPath.c_str());
1369 
1370     relabelAllDirs(cePath.c_str(), dataDataContext, fail_fn);
1371     relabelDir(cePath.c_str(), dataDataContext, fail_fn);
1372     relabelAllDirs(dePath.c_str(), dataDataContext, fail_fn);
1373     relabelDir(dePath.c_str(), dataDataContext, fail_fn);
1374   }
1375   closedir(dir);
1376 
1377   freecon(dataDataContext);
1378 }
1379 
1380 /**
1381  * Without sdk sandbox data isolation, the sandbox could detect if another app is installed on the
1382  * system by "touching" other data directories like /data/misc_ce/0/sdksandbox/com.whatsapp, similar
1383  * to apps without app data isolation (see {@link #isolateAppData()}).
1384  *
1385  * To prevent this, tmpfs is mounted onto misc_ce and misc_de directories on all possible volumes in
1386  * a separate mount namespace. The sandbox directory path is then created containing the name of the
1387  * client app package associated with the sdk sandbox. The contents for this (sdk level storage and
1388  * shared sdk storage) are bind mounted from the sandbox data mirror.
1389  */
isolateSdkSandboxData(JNIEnv * env,jobjectArray pkg_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1390 static void isolateSdkSandboxData(JNIEnv* env, jobjectArray pkg_data_info_list, uid_t uid,
1391                                   const char* process_name, jstring managed_nice_name,
1392                                   fail_fn_t fail_fn) {
1393     const userid_t userId = multiuser_get_user_id(uid);
1394 
1395     int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
1396     // The sandbox should only have information of one associated client app (package, uuid, inode)
1397     if (size != 3) {
1398         fail_fn(CREATE_ERROR(
1399                 "Unable to isolate sandbox data, incorrect associated app information"));
1400     }
1401 
1402     auto extract_fn = [env, process_name, managed_nice_name,
1403                        pkg_data_info_list](int info_list_idx) {
1404         jstring jstr = (jstring)(env->GetObjectArrayElement(pkg_data_info_list, info_list_idx));
1405         return ExtractJString(env, process_name, managed_nice_name, jstr).value();
1406     };
1407     std::string packageName = extract_fn(0);
1408     std::string volUuid = extract_fn(1);
1409 
1410     char internalCePath[PATH_MAX];
1411     char internalDePath[PATH_MAX];
1412     char externalPrivateMountPath[PATH_MAX];
1413     snprintf(internalCePath, PATH_MAX, "/data/misc_ce");
1414     snprintf(internalDePath, PATH_MAX, "/data/misc_de");
1415     snprintf(externalPrivateMountPath, PATH_MAX, "/mnt/expand");
1416 
1417     char ceUserPath[PATH_MAX];
1418     char deUserPath[PATH_MAX];
1419     if (volUuid != "null") {
1420         snprintf(ceUserPath, PATH_MAX, "%s/%s/misc_ce/%d", externalPrivateMountPath,
1421                  volUuid.c_str(), userId);
1422         snprintf(deUserPath, PATH_MAX, "%s/%s/misc_de/%d", externalPrivateMountPath,
1423                  volUuid.c_str(), userId);
1424     } else {
1425         snprintf(ceUserPath, PATH_MAX, "%s/%d", internalCePath, userId);
1426         snprintf(deUserPath, PATH_MAX, "%s/%d", internalDePath, userId);
1427     }
1428 
1429     char ceSandboxPath[PATH_MAX];
1430     char deSandboxPath[PATH_MAX];
1431     snprintf(ceSandboxPath, PATH_MAX, "%s/sdksandbox", ceUserPath);
1432     snprintf(deSandboxPath, PATH_MAX, "%s/sdksandbox", deUserPath);
1433 
1434     // If the client app using the sandbox has been installed when the device is locked and the
1435     // sandbox starts up when the device is locked, sandbox storage might not have been created.
1436     // In that case, mount tmpfs for data isolation, but don't bind mount.
1437     bool bindMountCeSandboxDataDirs = true;
1438     bool bindMountDeSandboxDataDirs = true;
1439     if (access(ceSandboxPath, F_OK) != 0) {
1440         bindMountCeSandboxDataDirs = false;
1441     }
1442     if (access(deSandboxPath, F_OK) != 0) {
1443         bindMountDeSandboxDataDirs = false;
1444     }
1445 
1446     char* context = nullptr;
1447     char* userContext = nullptr;
1448     char* sandboxContext = nullptr;
1449     if (getfilecon(internalDePath, &context) < 0) {
1450         fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", internalDePath, strerror(errno)));
1451     }
1452     if (bindMountDeSandboxDataDirs) {
1453         if (getfilecon(deUserPath, &userContext) < 0) {
1454             fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", deUserPath, strerror(errno)));
1455         }
1456         if (getfilecon(deSandboxPath, &sandboxContext) < 0) {
1457             fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", deSandboxPath, strerror(errno)));
1458         }
1459     }
1460 
1461     MountAppDataTmpFs(internalCePath, fail_fn);
1462     MountAppDataTmpFs(internalDePath, fail_fn);
1463 
1464     // Mount tmpfs on all external volumes
1465     DIR* dir = opendir(externalPrivateMountPath);
1466     if (dir == nullptr) {
1467         fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1468     }
1469     struct dirent* ent;
1470     while ((ent = readdir(dir))) {
1471         if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1472         if (ent->d_type != DT_DIR) {
1473             fail_fn(CREATE_ERROR("Unexpected type: %d %s", ent->d_type, ent->d_name));
1474         }
1475         auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1476         auto externalCePath = StringPrintf("%s/misc_ce", volPath.c_str());
1477         auto externalDePath = StringPrintf("%s/misc_de", volPath.c_str());
1478 
1479         WaitUntilDirReady(externalCePath.c_str(), fail_fn);
1480         MountAppDataTmpFs(externalCePath.c_str(), fail_fn);
1481         WaitUntilDirReady(externalDePath.c_str(), fail_fn);
1482         MountAppDataTmpFs(externalDePath.c_str(), fail_fn);
1483     }
1484     closedir(dir);
1485 
1486     char mirrorCeSandboxPath[PATH_MAX];
1487     char mirrorDeSandboxPath[PATH_MAX];
1488     snprintf(mirrorCeSandboxPath, PATH_MAX, "/data_mirror/misc_ce/%s/%d/sdksandbox",
1489              volUuid.c_str(), userId);
1490     snprintf(mirrorDeSandboxPath, PATH_MAX, "/data_mirror/misc_de/%s/%d/sdksandbox",
1491              volUuid.c_str(), userId);
1492 
1493     if (bindMountCeSandboxDataDirs) {
1494         PrepareDir(ceUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1495         PrepareDir(ceSandboxPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1496         // TODO(b/231322885): Use inode numbers to find the correct app path when the device locked.
1497         createAndMountAppData(packageName, packageName, mirrorCeSandboxPath, ceSandboxPath, fail_fn,
1498                               true /*call_fail_fn*/);
1499 
1500         relabelDir(ceSandboxPath, sandboxContext, fail_fn);
1501         relabelDir(ceUserPath, userContext, fail_fn);
1502     }
1503     if (bindMountDeSandboxDataDirs) {
1504         PrepareDir(deUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1505         PrepareDir(deSandboxPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1506         createAndMountAppData(packageName, packageName, mirrorDeSandboxPath, deSandboxPath, fail_fn,
1507                               true /*call_fail_fn*/);
1508 
1509         relabelDir(deSandboxPath, sandboxContext, fail_fn);
1510         relabelDir(deUserPath, userContext, fail_fn);
1511     }
1512 
1513     // We set the label AFTER everything is done, as we are applying
1514     // the file operations on tmpfs. If we set the label when we mount
1515     // tmpfs, SELinux will not happy as we are changing system_data_files.
1516     relabelDir(internalCePath, context, fail_fn);
1517     relabelDir(internalDePath, context, fail_fn);
1518 
1519     // Relabel CE and DE dirs under /mnt/expand
1520     dir = opendir(externalPrivateMountPath);
1521     if (dir == nullptr) {
1522         fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1523     }
1524     while ((ent = readdir(dir))) {
1525         if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1526         auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1527         auto externalCePath = StringPrintf("%s/misc_ce", volPath.c_str());
1528         auto externalDePath = StringPrintf("%s/misc_de", volPath.c_str());
1529         relabelDir(externalCePath.c_str(), context, fail_fn);
1530         relabelDir(externalDePath.c_str(), context, fail_fn);
1531     }
1532     closedir(dir);
1533 
1534     if (bindMountDeSandboxDataDirs) {
1535         freecon(sandboxContext);
1536         freecon(userContext);
1537     }
1538     freecon(context);
1539 }
1540 
insertPackagesToMergedList(JNIEnv * env,std::vector<std::string> & merged_data_info_list,jobjectArray data_info_list,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1541 static void insertPackagesToMergedList(JNIEnv* env,
1542   std::vector<std::string>& merged_data_info_list,
1543   jobjectArray data_info_list, const char* process_name,
1544   jstring managed_nice_name, fail_fn_t fail_fn) {
1545 
1546   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1547 
1548   int size = (data_info_list != nullptr) ? env->GetArrayLength(data_info_list) : 0;
1549   // Size should be a multiple of 3, as it contains list of <package_name, volume_uuid, inode>
1550   if ((size % 3) != 0) {
1551     fail_fn(CREATE_ERROR("Wrong data_info_list size %d", size));
1552   }
1553 
1554   for (int i = 0; i < size; i += 3) {
1555     jstring package_str = (jstring) (env->GetObjectArrayElement(data_info_list, i));
1556     std::string packageName = extract_fn(package_str).value();
1557     merged_data_info_list.push_back(packageName);
1558 
1559     jstring vol_str = (jstring) (env->GetObjectArrayElement(data_info_list, i + 1));
1560     std::string volUuid = extract_fn(vol_str).value();
1561     merged_data_info_list.push_back(volUuid);
1562 
1563     jstring inode_str = (jstring) (env->GetObjectArrayElement(data_info_list, i + 2));
1564     std::string inode = extract_fn(inode_str).value();
1565     merged_data_info_list.push_back(inode);
1566   }
1567 }
1568 
isolateAppData(JNIEnv * env,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1569 static void isolateAppData(JNIEnv* env, jobjectArray pkg_data_info_list,
1570                            jobjectArray allowlisted_data_info_list, uid_t uid,
1571                            const char* process_name, jstring managed_nice_name, fail_fn_t fail_fn) {
1572     std::vector<std::string> merged_data_info_list;
1573     insertPackagesToMergedList(env, merged_data_info_list, pkg_data_info_list, process_name,
1574                                managed_nice_name, fail_fn);
1575     insertPackagesToMergedList(env, merged_data_info_list, allowlisted_data_info_list, process_name,
1576                                managed_nice_name, fail_fn);
1577 
1578     isolateAppData(env, merged_data_info_list, uid, process_name, managed_nice_name, fail_fn);
1579 }
1580 
1581 /**
1582  * Like isolateAppData(), isolate jit profile directories, so apps don't see what
1583  * other apps are installed by reading content inside /data/misc/profiles/cur.
1584  *
1585  * The implementation is similar to isolateAppData(), it creates a tmpfs
1586  * on /data/misc/profiles/cur, and bind mounts related package profiles to it.
1587  */
isolateJitProfile(JNIEnv * env,jobjectArray pkg_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1588 static void isolateJitProfile(JNIEnv* env, jobjectArray pkg_data_info_list,
1589     uid_t uid, const char* process_name, jstring managed_nice_name,
1590     fail_fn_t fail_fn) {
1591 
1592   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1593   const userid_t user_id = multiuser_get_user_id(uid);
1594 
1595   int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
1596   // Size should be a multiple of 3, as it contains list of <package_name, volume_uuid, inode>
1597   if ((size % 3) != 0) {
1598     fail_fn(CREATE_ERROR("Wrong pkg_inode_list size %d", size));
1599   }
1600 
1601   // Mount (namespace) tmpfs on profile directory, so apps no longer access
1602   // the original profile directory anymore.
1603   MountAppDataTmpFs(kCurProfileDirPath, fail_fn);
1604   MountAppDataTmpFs(kRefProfileDirPath, fail_fn);
1605 
1606   // Sandbox processes do not have JIT profile, so no data needs to be bind mounted. However, it
1607   // should still not have access to JIT profile, so tmpfs is mounted.
1608   if (is_sdk_sandbox_uid(uid)) {
1609       return;
1610   }
1611 
1612   // Create profile directory for this user.
1613   std::string actualCurUserProfile = StringPrintf("%s/%d", kCurProfileDirPath, user_id);
1614   PrepareDir(actualCurUserProfile, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1615 
1616   for (int i = 0; i < size; i += 3) {
1617     jstring package_str = (jstring) (env->GetObjectArrayElement(pkg_data_info_list, i));
1618     std::string packageName = extract_fn(package_str).value();
1619 
1620     std::string actualCurPackageProfile = StringPrintf("%s/%s", actualCurUserProfile.c_str(),
1621         packageName.c_str());
1622     std::string mirrorCurPackageProfile = StringPrintf("/data_mirror/cur_profiles/%d/%s",
1623         user_id, packageName.c_str());
1624     std::string actualRefPackageProfile = StringPrintf("%s/%s", kRefProfileDirPath,
1625         packageName.c_str());
1626     std::string mirrorRefPackageProfile = StringPrintf("/data_mirror/ref_profiles/%s",
1627         packageName.c_str());
1628 
1629     if (access(mirrorCurPackageProfile.c_str(), F_OK) != 0) {
1630       ALOGW("Can't access app profile directory: %s", mirrorCurPackageProfile.c_str());
1631       continue;
1632     }
1633     if (access(mirrorRefPackageProfile.c_str(), F_OK) != 0) {
1634       ALOGW("Can't access app profile directory: %s", mirrorRefPackageProfile.c_str());
1635       continue;
1636     }
1637 
1638     PrepareDir(actualCurPackageProfile, DEFAULT_DATA_DIR_PERMISSION, uid, uid, fail_fn);
1639     BindMount(mirrorCurPackageProfile, actualCurPackageProfile, fail_fn);
1640     PrepareDir(actualRefPackageProfile, DEFAULT_DATA_DIR_PERMISSION, uid, uid, fail_fn);
1641     BindMount(mirrorRefPackageProfile, actualRefPackageProfile, fail_fn);
1642   }
1643 }
1644 
WaitUntilDirReady(const std::string & target,fail_fn_t fail_fn)1645 static void WaitUntilDirReady(const std::string& target, fail_fn_t fail_fn) {
1646   unsigned int sleepIntervalUs = STORAGE_DIR_CHECK_INIT_INTERVAL_US;
1647 
1648   // This is just an approximate value as it doesn't need to be very accurate.
1649   unsigned int sleepTotalUs = 0;
1650 
1651   const char* dir_path = target.c_str();
1652   while (sleepTotalUs < STORAGE_DIR_CHECK_TIMEOUT_US) {
1653     if (access(dir_path, F_OK) == 0) {
1654       return;
1655     }
1656     // Failed, so we add exponential backoff and retry
1657     usleep(sleepIntervalUs);
1658     sleepTotalUs += sleepIntervalUs;
1659     sleepIntervalUs = std::min<unsigned int>(
1660         sleepIntervalUs * STORAGE_DIR_CHECK_RETRY_MULTIPLIER,
1661         STORAGE_DIR_CHECK_MAX_INTERVAL_US);
1662   }
1663   // Last chance and get the latest errno if it fails.
1664   if (access(dir_path, F_OK) == 0) {
1665     return;
1666   }
1667   fail_fn(CREATE_ERROR("Error dir is not ready %s: %s", dir_path, strerror(errno)));
1668 }
1669 
BindMountStorageToLowerFs(const userid_t user_id,const uid_t uid,const char * dir_name,const char * package,fail_fn_t fail_fn)1670 static void BindMountStorageToLowerFs(const userid_t user_id, const uid_t uid,
1671     const char* dir_name, const char* package, fail_fn_t fail_fn) {
1672     bool hasSdcardFs = IsSdcardfsUsed();
1673     std::string source;
1674     if (hasSdcardFs) {
1675         source = StringPrintf("/mnt/runtime/default/emulated/%d/%s/%s", user_id, dir_name, package);
1676     } else {
1677         source = StringPrintf("/mnt/pass_through/%d/emulated/%d/%s/%s", user_id, user_id, dir_name,
1678                               package);
1679     }
1680 
1681   // Directory might be not ready, as prepareStorageDirs() is running asynchronously in ProcessList,
1682   // so wait until dir is created.
1683   WaitUntilDirReady(source, fail_fn);
1684   std::string target = StringPrintf("/storage/emulated/%d/%s/%s", user_id, dir_name, package);
1685 
1686   // As the parent is mounted as tmpfs, we need to create the target dir here.
1687   PrepareDirIfNotPresent(target, 0700, uid, uid, fail_fn);
1688 
1689   if (access(source.c_str(), F_OK) != 0) {
1690     fail_fn(CREATE_ERROR("Error accessing %s: %s", source.c_str(), strerror(errno)));
1691   }
1692   if (access(target.c_str(), F_OK) != 0) {
1693     fail_fn(CREATE_ERROR("Error accessing %s: %s", target.c_str(), strerror(errno)));
1694   }
1695   BindMount(source, target, fail_fn);
1696 }
1697 
1698 // Mount tmpfs on Android/data and Android/obb, then bind mount all app visible package
1699 // directories in data and obb directories.
BindMountStorageDirs(JNIEnv * env,jobjectArray pkg_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1700 static void BindMountStorageDirs(JNIEnv* env, jobjectArray pkg_data_info_list,
1701     uid_t uid, const char* process_name, jstring managed_nice_name, fail_fn_t fail_fn) {
1702 
1703   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1704   const userid_t user_id = multiuser_get_user_id(uid);
1705 
1706   // Fuse is ready, so we can start using fuse path.
1707   int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
1708 
1709   // Create tmpfs on Android/obb and Android/data so these 2 dirs won't enter fuse anymore.
1710   std::string androidObbDir = StringPrintf("/storage/emulated/%d/Android/obb", user_id);
1711   MountAppDataTmpFs(androidObbDir, fail_fn);
1712   std::string androidDataDir = StringPrintf("/storage/emulated/%d/Android/data", user_id);
1713   MountAppDataTmpFs(androidDataDir, fail_fn);
1714 
1715   // Bind mount each package obb directory
1716   for (int i = 0; i < size; i += 3) {
1717     jstring package_str = (jstring) (env->GetObjectArrayElement(pkg_data_info_list, i));
1718     std::string packageName = extract_fn(package_str).value();
1719     BindMountStorageToLowerFs(user_id, uid, "Android/obb", packageName.c_str(), fail_fn);
1720     BindMountStorageToLowerFs(user_id, uid, "Android/data", packageName.c_str(), fail_fn);
1721   }
1722 }
1723 
1724 // Utility routine to specialize a zygote child process.
SpecializeCommon(JNIEnv * env,uid_t uid,gid_t gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jlong permitted_capabilities,jlong effective_capabilities,jint mount_external,jstring managed_se_info,jstring managed_nice_name,bool is_system_server,bool is_child_zygote,jstring managed_instruction_set,jstring managed_app_data_dir,bool is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,bool mount_data_dirs,bool mount_storage_dirs)1725 static void SpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray gids, jint runtime_flags,
1726                              jobjectArray rlimits, jlong permitted_capabilities,
1727                              jlong effective_capabilities, jint mount_external,
1728                              jstring managed_se_info, jstring managed_nice_name,
1729                              bool is_system_server, bool is_child_zygote,
1730                              jstring managed_instruction_set, jstring managed_app_data_dir,
1731                              bool is_top_app, jobjectArray pkg_data_info_list,
1732                              jobjectArray allowlisted_data_info_list, bool mount_data_dirs,
1733                              bool mount_storage_dirs) {
1734     const char* process_name = is_system_server ? "system_server" : "zygote";
1735     auto fail_fn = std::bind(ZygoteFailure, env, process_name, managed_nice_name, _1);
1736     auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1737 
1738     auto se_info = extract_fn(managed_se_info);
1739     auto nice_name = extract_fn(managed_nice_name);
1740     auto instruction_set = extract_fn(managed_instruction_set);
1741     auto app_data_dir = extract_fn(managed_app_data_dir);
1742 
1743     // Keep capabilities across UID change, unless we're staying root.
1744     if (uid != 0) {
1745         EnableKeepCapabilities(fail_fn);
1746     }
1747 
1748     SetInheritable(permitted_capabilities, fail_fn);
1749 
1750     DropCapabilitiesBoundingSet(fail_fn);
1751 
1752     bool need_pre_initialize_native_bridge = !is_system_server && instruction_set.has_value() &&
1753             android::NativeBridgeAvailable() &&
1754             // Native bridge may be already initialized if this
1755             // is an app forked from app-zygote.
1756             !android::NativeBridgeInitialized() &&
1757             android::NeedsNativeBridge(instruction_set.value().c_str());
1758 
1759     MountEmulatedStorage(uid, mount_external, need_pre_initialize_native_bridge, fail_fn);
1760 
1761     // Make sure app is running in its own mount namespace before isolating its data directories.
1762     ensureInAppMountNamespace(fail_fn);
1763 
1764     // Isolate app data, jit profile and sandbox data directories by overlaying a tmpfs on those
1765     // dirs and bind mount all related packages separately.
1766     if (mount_data_dirs) {
1767         // Sdk sandbox data isolation does not need to occur for app processes since sepolicy
1768         // prevents access to sandbox data anyway.
1769         if (is_sdk_sandbox_uid(uid)) {
1770             isolateSdkSandboxData(env, pkg_data_info_list, uid, process_name, managed_nice_name,
1771                                   fail_fn);
1772         }
1773         isolateAppData(env, pkg_data_info_list, allowlisted_data_info_list, uid, process_name,
1774                        managed_nice_name, fail_fn);
1775         isolateJitProfile(env, pkg_data_info_list, uid, process_name, managed_nice_name, fail_fn);
1776     }
1777     // MOUNT_EXTERNAL_INSTALLER, MOUNT_EXTERNAL_PASS_THROUGH, MOUNT_EXTERNAL_ANDROID_WRITABLE apps
1778     // will have mount_storage_dirs == false here (set by ProcessList.needsStorageDataIsolation()),
1779     // and hence they won't bind mount storage dirs.
1780     if (mount_storage_dirs) {
1781         BindMountStorageDirs(env, pkg_data_info_list, uid, process_name, managed_nice_name,
1782                              fail_fn);
1783     }
1784 
1785     // If this zygote isn't root, it won't be able to create a process group,
1786     // since the directory is owned by root.
1787     if (!is_system_server && getuid() == 0) {
1788         const int rc = createProcessGroup(uid, getpid());
1789         if (rc == -EROFS) {
1790             ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
1791         } else if (rc != 0) {
1792             ALOGE("createProcessGroup(%d, %d) failed: %s", uid, /* pid= */ 0, strerror(-rc));
1793         }
1794     }
1795 
1796     SetGids(env, gids, is_child_zygote, fail_fn);
1797     SetRLimits(env, rlimits, fail_fn);
1798 
1799     if (need_pre_initialize_native_bridge) {
1800         // Due to the logic behind need_pre_initialize_native_bridge we know that
1801         // instruction_set contains a value.
1802         android::PreInitializeNativeBridge(app_data_dir.has_value() ? app_data_dir.value().c_str()
1803                                                                     : nullptr,
1804                                            instruction_set.value().c_str());
1805     }
1806 
1807     if (is_system_server) {
1808         // Prefetch the classloader for the system server. This is done early to
1809         // allow a tie-down of the proper system server selinux domain.
1810         env->CallStaticObjectMethod(gZygoteInitClass, gGetOrCreateSystemServerClassLoader);
1811         if (env->ExceptionCheck()) {
1812             // Be robust here. The Java code will attempt to create the classloader
1813             // at a later point (but may not have rights to use AoT artifacts).
1814             env->ExceptionClear();
1815         }
1816         // Also prefetch standalone system server jars. The reason for doing this here is the same
1817         // as above.
1818         env->CallStaticVoidMethod(gZygoteInitClass, gPrefetchStandaloneSystemServerJars);
1819         if (env->ExceptionCheck()) {
1820             env->ExceptionClear();
1821         }
1822     }
1823 
1824     if (setresgid(gid, gid, gid) == -1) {
1825         fail_fn(CREATE_ERROR("setresgid(%d) failed: %s", gid, strerror(errno)));
1826     }
1827 
1828     // Must be called when the new process still has CAP_SYS_ADMIN, in this case,
1829     // before changing uid from 0, which clears capabilities.  The other
1830     // alternative is to call prctl(PR_SET_NO_NEW_PRIVS, 1) afterward, but that
1831     // breaks SELinux domain transition (see b/71859146).  As the result,
1832     // privileged syscalls used below still need to be accessible in app process.
1833     SetUpSeccompFilter(uid, is_child_zygote);
1834 
1835     // Must be called before losing the permission to set scheduler policy.
1836     SetSchedulerPolicy(fail_fn, is_top_app);
1837 
1838     if (setresuid(uid, uid, uid) == -1) {
1839         fail_fn(CREATE_ERROR("setresuid(%d) failed: %s", uid, strerror(errno)));
1840     }
1841 
1842     // The "dumpable" flag of a process, which controls core dump generation, is
1843     // overwritten by the value in /proc/sys/fs/suid_dumpable when the effective
1844     // user or group ID changes. See proc(5) for possible values. In most cases,
1845     // the value is 0, so core dumps are disabled for zygote children. However,
1846     // when running in a Chrome OS container, the value is already set to 2,
1847     // which allows the external crash reporter to collect all core dumps. Since
1848     // only system crashes are interested, core dump is disabled for app
1849     // processes. This also ensures compliance with CTS.
1850     int dumpable = prctl(PR_GET_DUMPABLE);
1851     if (dumpable == -1) {
1852         ALOGE("prctl(PR_GET_DUMPABLE) failed: %s", strerror(errno));
1853         RuntimeAbort(env, __LINE__, "prctl(PR_GET_DUMPABLE) failed");
1854     }
1855 
1856     if (dumpable == 2 && uid >= AID_APP) {
1857         if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) == -1) {
1858             ALOGE("prctl(PR_SET_DUMPABLE, 0) failed: %s", strerror(errno));
1859             RuntimeAbort(env, __LINE__, "prctl(PR_SET_DUMPABLE, 0) failed");
1860         }
1861     }
1862 
1863     // Set process properties to enable debugging if required.
1864     if ((runtime_flags & RuntimeFlags::DEBUG_ENABLE_JDWP) != 0) {
1865         EnableDebugger();
1866     }
1867     if ((runtime_flags & RuntimeFlags::PROFILE_FROM_SHELL) != 0) {
1868         // simpleperf needs the process to be dumpable to profile it.
1869         if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
1870             ALOGE("prctl(PR_SET_DUMPABLE) failed: %s", strerror(errno));
1871             RuntimeAbort(env, __LINE__, "prctl(PR_SET_DUMPABLE, 1) failed");
1872         }
1873     }
1874 
1875     HeapTaggingLevel heap_tagging_level;
1876     switch (runtime_flags & RuntimeFlags::MEMORY_TAG_LEVEL_MASK) {
1877         case RuntimeFlags::MEMORY_TAG_LEVEL_TBI:
1878             heap_tagging_level = M_HEAP_TAGGING_LEVEL_TBI;
1879             break;
1880         case RuntimeFlags::MEMORY_TAG_LEVEL_ASYNC:
1881             heap_tagging_level = M_HEAP_TAGGING_LEVEL_ASYNC;
1882             break;
1883         case RuntimeFlags::MEMORY_TAG_LEVEL_SYNC:
1884             heap_tagging_level = M_HEAP_TAGGING_LEVEL_SYNC;
1885             break;
1886         default:
1887             heap_tagging_level = M_HEAP_TAGGING_LEVEL_NONE;
1888             break;
1889     }
1890     mallopt(M_BIONIC_SET_HEAP_TAGGING_LEVEL, heap_tagging_level);
1891 
1892     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
1893     // runtime.
1894     runtime_flags &= ~RuntimeFlags::MEMORY_TAG_LEVEL_MASK;
1895 
1896     // Avoid heap zero initialization for applications without MTE. Zero init may
1897     // cause app compat problems, use more memory, or reduce performance. While it
1898     // would be nice to have them for apps, we will have to wait until they are
1899     // proven out, have more efficient hardware, and/or apply them only to new
1900     // applications.
1901     if (!(runtime_flags & RuntimeFlags::NATIVE_HEAP_ZERO_INIT_ENABLED)) {
1902         mallopt(M_BIONIC_ZERO_INIT, 0);
1903     }
1904 
1905     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
1906     // runtime.
1907     runtime_flags &= ~RuntimeFlags::NATIVE_HEAP_ZERO_INIT_ENABLED;
1908 
1909     const char* nice_name_ptr = nice_name.has_value() ? nice_name.value().c_str() : nullptr;
1910     android_mallopt_gwp_asan_options_t gwp_asan_options;
1911     // The system server doesn't have its nice name set by the time SpecializeCommon is called.
1912     gwp_asan_options.program_name = nice_name_ptr ?: process_name;
1913     switch (runtime_flags & RuntimeFlags::GWP_ASAN_LEVEL_MASK) {
1914         default:
1915         case RuntimeFlags::GWP_ASAN_LEVEL_NEVER:
1916             gwp_asan_options.desire = Action::DONT_TURN_ON_UNLESS_OVERRIDDEN;
1917             android_mallopt(M_INITIALIZE_GWP_ASAN, &gwp_asan_options, sizeof(gwp_asan_options));
1918             break;
1919         case RuntimeFlags::GWP_ASAN_LEVEL_ALWAYS:
1920             gwp_asan_options.desire = Action::TURN_ON_FOR_APP;
1921             android_mallopt(M_INITIALIZE_GWP_ASAN, &gwp_asan_options, sizeof(gwp_asan_options));
1922             break;
1923         case RuntimeFlags::GWP_ASAN_LEVEL_LOTTERY:
1924             gwp_asan_options.desire = Action::TURN_ON_WITH_SAMPLING;
1925             android_mallopt(M_INITIALIZE_GWP_ASAN, &gwp_asan_options, sizeof(gwp_asan_options));
1926             break;
1927     }
1928     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
1929     // runtime.
1930     runtime_flags &= ~RuntimeFlags::GWP_ASAN_LEVEL_MASK;
1931 
1932     SetCapabilities(permitted_capabilities, effective_capabilities, permitted_capabilities,
1933                     fail_fn);
1934 
1935     __android_log_close();
1936     AStatsSocket_close();
1937 
1938     const char* se_info_ptr = se_info.has_value() ? se_info.value().c_str() : nullptr;
1939 
1940     if (selinux_android_setcontext(uid, is_system_server, se_info_ptr, nice_name_ptr) == -1) {
1941         fail_fn(CREATE_ERROR("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
1942                              is_system_server, se_info_ptr, nice_name_ptr));
1943     }
1944 
1945     // Make it easier to debug audit logs by setting the main thread's name to the
1946     // nice name rather than "app_process".
1947     if (nice_name.has_value()) {
1948         SetThreadName(nice_name.value());
1949     } else if (is_system_server) {
1950         SetThreadName("system_server");
1951     }
1952 
1953     // Unset the SIGCHLD handler, but keep ignoring SIGHUP (rationale in SetSignalHandlers).
1954     UnsetChldSignalHandler();
1955 
1956     if (is_system_server) {
1957         env->CallStaticVoidMethod(gZygoteClass, gCallPostForkSystemServerHooks, runtime_flags);
1958         if (env->ExceptionCheck()) {
1959             fail_fn("Error calling post fork system server hooks.");
1960         }
1961 
1962         // TODO(b/117874058): Remove hardcoded label here.
1963         static const char* kSystemServerLabel = "u:r:system_server:s0";
1964         if (selinux_android_setcon(kSystemServerLabel) != 0) {
1965             fail_fn(CREATE_ERROR("selinux_android_setcon(%s)", kSystemServerLabel));
1966         }
1967     }
1968 
1969     if (is_child_zygote) {
1970         initUnsolSocketToSystemServer();
1971     }
1972 
1973     env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, runtime_flags,
1974                               is_system_server, is_child_zygote, managed_instruction_set);
1975 
1976     // Reset the process priority to the default value.
1977     setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_DEFAULT);
1978 
1979     if (env->ExceptionCheck()) {
1980         fail_fn("Error calling post fork hooks.");
1981     }
1982 }
1983 
GetEffectiveCapabilityMask(JNIEnv * env)1984 static uint64_t GetEffectiveCapabilityMask(JNIEnv* env) {
1985     __user_cap_header_struct capheader;
1986     memset(&capheader, 0, sizeof(capheader));
1987     capheader.version = _LINUX_CAPABILITY_VERSION_3;
1988     capheader.pid = 0;
1989 
1990     __user_cap_data_struct capdata[2];
1991     if (capget(&capheader, &capdata[0]) == -1) {
1992         ALOGE("capget failed: %s", strerror(errno));
1993         RuntimeAbort(env, __LINE__, "capget failed");
1994     }
1995 
1996     return capdata[0].effective | (static_cast<uint64_t>(capdata[1].effective) << 32);
1997 }
1998 
CalculateCapabilities(JNIEnv * env,jint uid,jint gid,jintArray gids,bool is_child_zygote)1999 static jlong CalculateCapabilities(JNIEnv* env, jint uid, jint gid, jintArray gids,
2000                                    bool is_child_zygote) {
2001   jlong capabilities = 0;
2002 
2003   /*
2004    *  Grant the following capabilities to the Bluetooth user:
2005    *    - CAP_WAKE_ALARM
2006    *    - CAP_NET_ADMIN
2007    *    - CAP_NET_RAW
2008    *    - CAP_NET_BIND_SERVICE (for DHCP client functionality)
2009    *    - CAP_SYS_NICE (for setting RT priority for audio-related threads)
2010    */
2011 
2012   if (multiuser_get_app_id(uid) == AID_BLUETOOTH) {
2013     capabilities |= (1LL << CAP_WAKE_ALARM);
2014     capabilities |= (1LL << CAP_NET_ADMIN);
2015     capabilities |= (1LL << CAP_NET_RAW);
2016     capabilities |= (1LL << CAP_NET_BIND_SERVICE);
2017     capabilities |= (1LL << CAP_SYS_NICE);
2018   }
2019 
2020   if (multiuser_get_app_id(uid) == AID_NETWORK_STACK) {
2021     capabilities |= (1LL << CAP_NET_ADMIN);
2022     capabilities |= (1LL << CAP_NET_BROADCAST);
2023     capabilities |= (1LL << CAP_NET_BIND_SERVICE);
2024     capabilities |= (1LL << CAP_NET_RAW);
2025   }
2026 
2027   /*
2028    * Grant CAP_BLOCK_SUSPEND to processes that belong to GID "wakelock"
2029    */
2030 
2031   bool gid_wakelock_found = false;
2032   if (gid == AID_WAKELOCK) {
2033     gid_wakelock_found = true;
2034   } else if (gids != nullptr) {
2035     jsize gids_num = env->GetArrayLength(gids);
2036     ScopedIntArrayRO native_gid_proxy(env, gids);
2037 
2038     if (native_gid_proxy.get() == nullptr) {
2039       RuntimeAbort(env, __LINE__, "Bad gids array");
2040     }
2041 
2042     for (int gids_index = 0; gids_index < gids_num; ++gids_index) {
2043       if (native_gid_proxy[gids_index] == AID_WAKELOCK) {
2044         gid_wakelock_found = true;
2045         break;
2046       }
2047     }
2048   }
2049 
2050   if (gid_wakelock_found) {
2051     capabilities |= (1LL << CAP_BLOCK_SUSPEND);
2052   }
2053 
2054   /*
2055    * Grant child Zygote processes the following capabilities:
2056    *   - CAP_SETUID (change UID of child processes)
2057    *   - CAP_SETGID (change GID of child processes)
2058    *   - CAP_SETPCAP (change capabilities of child processes)
2059    */
2060 
2061   if (is_child_zygote) {
2062     capabilities |= (1LL << CAP_SETUID);
2063     capabilities |= (1LL << CAP_SETGID);
2064     capabilities |= (1LL << CAP_SETPCAP);
2065   }
2066 
2067   /*
2068    * Containers run without some capabilities, so drop any caps that are not
2069    * available.
2070    */
2071 
2072   return capabilities & GetEffectiveCapabilityMask(env);
2073 }
2074 
2075 /**
2076  * Adds the given information about a newly created unspecialized app
2077  * processes to the Zygote's USAP table.
2078  *
2079  * @param usap_pid  Process ID of the newly created USAP
2080  * @param read_pipe_fd  File descriptor for the read end of the USAP
2081  * reporting pipe.  Used in the ZygoteServer poll loop to track USAP
2082  * specialization.
2083  */
AddUsapTableEntry(pid_t usap_pid,int read_pipe_fd)2084 static void AddUsapTableEntry(pid_t usap_pid, int read_pipe_fd) {
2085   static int sUsapTableInsertIndex = 0;
2086 
2087   int search_index = sUsapTableInsertIndex;
2088   do {
2089     if (gUsapTable[search_index].SetIfInvalid(usap_pid, read_pipe_fd)) {
2090       ++gUsapPoolCount;
2091 
2092       // Start our next search right after where we finished this one.
2093       sUsapTableInsertIndex = (search_index + 1) % gUsapTable.size();
2094 
2095       return;
2096     }
2097 
2098     search_index = (search_index + 1) % gUsapTable.size();
2099   } while (search_index != sUsapTableInsertIndex);
2100 
2101   // Much like money in the banana stand, there should always be an entry
2102   // in the USAP table.
2103   __builtin_unreachable();
2104 }
2105 
2106 /**
2107  * Invalidates the entry in the USAPTable corresponding to the provided
2108  * process ID if it is present.  If an entry was removed the USAP pool
2109  * count is decremented. May be called from signal handler.
2110  *
2111  * @param usap_pid  Process ID of the USAP entry to invalidate
2112  * @return True if an entry was invalidated; false otherwise
2113  */
RemoveUsapTableEntry(pid_t usap_pid)2114 static bool RemoveUsapTableEntry(pid_t usap_pid) {
2115   for (UsapTableEntry& entry : gUsapTable) {
2116     if (entry.ClearForPID(usap_pid)) {
2117       --gUsapPoolCount;
2118       return true;
2119     }
2120   }
2121 
2122   return false;
2123 }
2124 
2125 /**
2126  * @return A vector of the read pipe FDs for each of the active USAPs.
2127  */
MakeUsapPipeReadFDVector()2128 std::vector<int> MakeUsapPipeReadFDVector() {
2129   std::vector<int> fd_vec;
2130   fd_vec.reserve(gUsapTable.size());
2131 
2132   for (UsapTableEntry& entry : gUsapTable) {
2133     auto entry_values = entry.GetValues();
2134 
2135     if (entry_values.has_value()) {
2136       fd_vec.push_back(entry_values.value().read_pipe_fd);
2137     }
2138   }
2139 
2140   return fd_vec;
2141 }
2142 
UnmountStorageOnInit(JNIEnv * env)2143 static void UnmountStorageOnInit(JNIEnv* env) {
2144   // Zygote process unmount root storage space initially before every child processes are forked.
2145   // Every forked child processes (include SystemServer) only mount their own root storage space
2146   // and no need unmount storage operation in MountEmulatedStorage method.
2147   // Zygote process does not utilize root storage spaces and unshares its mount namespace below.
2148 
2149   // See storage config details at http://source.android.com/tech/storage/
2150   // Create private mount namespace shared by all children
2151   if (unshare(CLONE_NEWNS) == -1) {
2152     RuntimeAbort(env, __LINE__, "Failed to unshare()");
2153     return;
2154   }
2155 
2156   // Mark rootfs as being MS_SLAVE so that changes from default
2157   // namespace only flow into our children.
2158   if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
2159     RuntimeAbort(env, __LINE__, "Failed to mount() rootfs as MS_SLAVE");
2160     return;
2161   }
2162 
2163   // Create a staging tmpfs that is shared by our children; they will
2164   // bind mount storage into their respective private namespaces, which
2165   // are isolated from each other.
2166   const char* target_base = getenv("EMULATED_STORAGE_TARGET");
2167   if (target_base != nullptr) {
2168 #define STRINGIFY_UID(x) __STRING(x)
2169     if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV,
2170               "uid=0,gid=" STRINGIFY_UID(AID_SDCARD_R) ",mode=0751") == -1) {
2171       ALOGE("Failed to mount tmpfs to %s", target_base);
2172       RuntimeAbort(env, __LINE__, "Failed to mount tmpfs");
2173       return;
2174     }
2175 #undef STRINGIFY_UID
2176   }
2177 
2178   UnmountTree("/storage");
2179 }
2180 
2181 }  // anonymous namespace
2182 
2183 namespace android {
2184 
2185 /**
2186  * A failure function used to report fatal errors to the managed runtime.  This
2187  * function is often curried with the process name information and then passed
2188  * to called functions.
2189  *
2190  * @param env  Managed runtime environment
2191  * @param process_name  A native representation of the process name
2192  * @param managed_process_name  A managed representation of the process name
2193  * @param msg  The error message to be reported
2194  */
2195 [[noreturn]]
ZygoteFailure(JNIEnv * env,const char * process_name,jstring managed_process_name,const std::string & msg)2196 void zygote::ZygoteFailure(JNIEnv* env,
2197                            const char* process_name,
2198                            jstring managed_process_name,
2199                            const std::string& msg) {
2200   std::unique_ptr<ScopedUtfChars> scoped_managed_process_name_ptr = nullptr;
2201   if (managed_process_name != nullptr) {
2202     scoped_managed_process_name_ptr.reset(new ScopedUtfChars(env, managed_process_name));
2203     if (scoped_managed_process_name_ptr->c_str() != nullptr) {
2204       process_name = scoped_managed_process_name_ptr->c_str();
2205     }
2206   }
2207 
2208   const std::string& error_msg =
2209       (process_name == nullptr || process_name[0] == '\0') ?
2210       msg : StringPrintf("(%s) %s", process_name, msg.c_str());
2211 
2212   env->FatalError(error_msg.c_str());
2213   __builtin_unreachable();
2214 }
2215 
2216 static std::set<int>* gPreloadFds = nullptr;
2217 static bool gPreloadFdsExtracted = false;
2218 
2219 // Utility routine to fork a process from the zygote.
ForkCommon(JNIEnv * env,bool is_system_server,const std::vector<int> & fds_to_close,const std::vector<int> & fds_to_ignore,bool is_priority_fork,bool purge)2220 pid_t zygote::ForkCommon(JNIEnv* env, bool is_system_server,
2221                          const std::vector<int>& fds_to_close,
2222                          const std::vector<int>& fds_to_ignore,
2223                          bool is_priority_fork,
2224                          bool purge) {
2225   SetSignalHandlers();
2226 
2227   // Curry a failure function.
2228   auto fail_fn = std::bind(zygote::ZygoteFailure, env,
2229                            is_system_server ? "system_server" : "zygote",
2230                            nullptr, _1);
2231 
2232   // Temporarily block SIGCHLD during forks. The SIGCHLD handler might
2233   // log, which would result in the logging FDs we close being reopened.
2234   // This would cause failures because the FDs are not allowlisted.
2235   //
2236   // Note that the zygote process is single threaded at this point.
2237   BlockSignal(SIGCHLD, fail_fn);
2238 
2239   // Close any logging related FDs before we start evaluating the list of
2240   // file descriptors.
2241   __android_log_close();
2242   AStatsSocket_close();
2243 
2244   // If this is the first fork for this zygote, create the open FD table,
2245   // verifying that files are of supported type and allowlisted.  Otherwise (not
2246   // the first fork), check that the open files have not changed.  Newly open
2247   // files are not expected, and will be disallowed in the future.  Currently
2248   // they are allowed if they pass the same checks as in the
2249   // FileDescriptorTable::Create() above.
2250   if (gOpenFdTable == nullptr) {
2251     gOpenFdTable = FileDescriptorTable::Create(fds_to_ignore, fail_fn);
2252   } else {
2253     gOpenFdTable->Restat(fds_to_ignore, fail_fn);
2254   }
2255 
2256   android_fdsan_error_level fdsan_error_level = android_fdsan_get_error_level();
2257 
2258   if (purge) {
2259     // Purge unused native memory in an attempt to reduce the amount of false
2260     // sharing with the child process.  By reducing the size of the libc_malloc
2261     // region shared with the child process we reduce the number of pages that
2262     // transition to the private-dirty state when malloc adjusts the meta-data
2263     // on each of the pages it is managing after the fork.
2264     mallopt(M_PURGE, 0);
2265   }
2266 
2267   pid_t pid = fork();
2268 
2269   if (pid == 0) {
2270     if (is_priority_fork) {
2271       setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MAX);
2272     } else {
2273       setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MIN);
2274     }
2275 
2276     // The child process.
2277     PreApplicationInit();
2278 
2279     // Clean up any descriptors which must be closed immediately
2280     DetachDescriptors(env, fds_to_close, fail_fn);
2281 
2282     // Invalidate the entries in the USAP table.
2283     ClearUsapTable();
2284 
2285     // Re-open all remaining open file descriptors so that they aren't shared
2286     // with the zygote across a fork.
2287     gOpenFdTable->ReopenOrDetach(fail_fn);
2288 
2289     // Turn fdsan back on.
2290     android_fdsan_set_error_level(fdsan_error_level);
2291 
2292     // Reset the fd to the unsolicited zygote socket
2293     gSystemServerSocketFd = -1;
2294   } else {
2295     ALOGD("Forked child process %d", pid);
2296   }
2297 
2298   // We blocked SIGCHLD prior to a fork, we unblock it here.
2299   UnblockSignal(SIGCHLD, fail_fn);
2300 
2301   return pid;
2302 }
2303 
com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv *,jclass)2304 static void com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv*, jclass) {
2305   PreApplicationInit();
2306 }
2307 
com_android_internal_os_Zygote_nativeForkAndSpecialize(JNIEnv * env,jclass,jint uid,jint gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jint mount_external,jstring se_info,jstring nice_name,jintArray managed_fds_to_close,jintArray managed_fds_to_ignore,jboolean is_child_zygote,jstring instruction_set,jstring app_data_dir,jboolean is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,jboolean mount_data_dirs,jboolean mount_storage_dirs)2308 static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
2309         JNIEnv* env, jclass, jint uid, jint gid, jintArray gids, jint runtime_flags,
2310         jobjectArray rlimits, jint mount_external, jstring se_info, jstring nice_name,
2311         jintArray managed_fds_to_close, jintArray managed_fds_to_ignore, jboolean is_child_zygote,
2312         jstring instruction_set, jstring app_data_dir, jboolean is_top_app,
2313         jobjectArray pkg_data_info_list, jobjectArray allowlisted_data_info_list,
2314         jboolean mount_data_dirs, jboolean mount_storage_dirs) {
2315     jlong capabilities = CalculateCapabilities(env, uid, gid, gids, is_child_zygote);
2316 
2317     if (UNLIKELY(managed_fds_to_close == nullptr)) {
2318       zygote::ZygoteFailure(env, "zygote", nice_name,
2319                             "Zygote received a null fds_to_close vector.");
2320     }
2321 
2322     std::vector<int> fds_to_close =
2323         ExtractJIntArray(env, "zygote", nice_name, managed_fds_to_close).value();
2324     std::vector<int> fds_to_ignore =
2325         ExtractJIntArray(env, "zygote", nice_name, managed_fds_to_ignore)
2326             .value_or(std::vector<int>());
2327 
2328     std::vector<int> usap_pipes = MakeUsapPipeReadFDVector();
2329 
2330     fds_to_close.insert(fds_to_close.end(), usap_pipes.begin(), usap_pipes.end());
2331     fds_to_ignore.insert(fds_to_ignore.end(), usap_pipes.begin(), usap_pipes.end());
2332 
2333     fds_to_close.push_back(gUsapPoolSocketFD);
2334 
2335     if (gUsapPoolEventFD != -1) {
2336       fds_to_close.push_back(gUsapPoolEventFD);
2337       fds_to_ignore.push_back(gUsapPoolEventFD);
2338     }
2339 
2340     if (gSystemServerSocketFd != -1) {
2341         fds_to_close.push_back(gSystemServerSocketFd);
2342         fds_to_ignore.push_back(gSystemServerSocketFd);
2343     }
2344 
2345     if (gPreloadFds && gPreloadFdsExtracted) {
2346         fds_to_ignore.insert(fds_to_ignore.end(), gPreloadFds->begin(), gPreloadFds->end());
2347     }
2348 
2349     pid_t pid = zygote::ForkCommon(env, /* is_system_server= */ false, fds_to_close, fds_to_ignore,
2350                                    true);
2351 
2352     if (pid == 0) {
2353         SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, capabilities, capabilities,
2354                          mount_external, se_info, nice_name, false, is_child_zygote == JNI_TRUE,
2355                          instruction_set, app_data_dir, is_top_app == JNI_TRUE, pkg_data_info_list,
2356                          allowlisted_data_info_list, mount_data_dirs == JNI_TRUE,
2357                          mount_storage_dirs == JNI_TRUE);
2358     }
2359     return pid;
2360 }
2361 
com_android_internal_os_Zygote_nativeForkSystemServer(JNIEnv * env,jclass,uid_t uid,gid_t gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jlong permitted_capabilities,jlong effective_capabilities)2362 static jint com_android_internal_os_Zygote_nativeForkSystemServer(
2363         JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
2364         jint runtime_flags, jobjectArray rlimits, jlong permitted_capabilities,
2365         jlong effective_capabilities) {
2366   std::vector<int> fds_to_close(MakeUsapPipeReadFDVector()),
2367                    fds_to_ignore(fds_to_close);
2368 
2369   fds_to_close.push_back(gUsapPoolSocketFD);
2370 
2371   if (gUsapPoolEventFD != -1) {
2372     fds_to_close.push_back(gUsapPoolEventFD);
2373     fds_to_ignore.push_back(gUsapPoolEventFD);
2374   }
2375 
2376   if (gSystemServerSocketFd != -1) {
2377       fds_to_close.push_back(gSystemServerSocketFd);
2378       fds_to_ignore.push_back(gSystemServerSocketFd);
2379   }
2380 
2381   pid_t pid = zygote::ForkCommon(env, true,
2382                                  fds_to_close,
2383                                  fds_to_ignore,
2384                                  true);
2385   if (pid == 0) {
2386       // System server prcoess does not need data isolation so no need to
2387       // know pkg_data_info_list.
2388       SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, permitted_capabilities,
2389                        effective_capabilities, MOUNT_EXTERNAL_DEFAULT, nullptr, nullptr, true,
2390                        false, nullptr, nullptr, /* is_top_app= */ false,
2391                        /* pkg_data_info_list */ nullptr,
2392                        /* allowlisted_data_info_list */ nullptr, false, false);
2393   } else if (pid > 0) {
2394       // The zygote process checks whether the child process has died or not.
2395       ALOGI("System server process %d has been created", pid);
2396       gSystemServerPid = pid;
2397       // There is a slight window that the system server process has crashed
2398       // but it went unnoticed because we haven't published its pid yet. So
2399       // we recheck here just to make sure that all is well.
2400       int status;
2401       if (waitpid(pid, &status, WNOHANG) == pid) {
2402           ALOGE("System server process %d has died. Restarting Zygote!", pid);
2403           RuntimeAbort(env, __LINE__, "System server process has died. Restarting Zygote!");
2404       }
2405 
2406       if (UsePerAppMemcg()) {
2407           // Assign system_server to the correct memory cgroup.
2408           // Not all devices mount memcg so check if it is mounted first
2409           // to avoid unnecessarily printing errors and denials in the logs.
2410           if (!SetTaskProfiles(pid, std::vector<std::string>{"SystemMemoryProcess"})) {
2411               ALOGE("couldn't add process %d into system memcg group", pid);
2412           }
2413       }
2414   }
2415   return pid;
2416 }
2417 
2418 /**
2419  * A JNI function that forks an unspecialized app process from the Zygote while
2420  * ensuring proper file descriptor hygiene.
2421  *
2422  * @param env  Managed runtime environment
2423  * @param read_pipe_fd  The read FD for the USAP reporting pipe.  Manually closed by the child
2424  * in managed code. -1 indicates none.
2425  * @param write_pipe_fd  The write FD for the USAP reporting pipe.  Manually closed by the
2426  * zygote in managed code. -1 indicates none.
2427  * @param managed_session_socket_fds  A list of anonymous session sockets that must be ignored by
2428  * the FD hygiene code and automatically "closed" in the new USAP.
2429  * @param args_known Arguments for specialization are available; no need to read from a socket
2430  * @param is_priority_fork  Controls the nice level assigned to the newly created process
2431  * @return child pid in the parent, 0 in the child
2432  */
com_android_internal_os_Zygote_nativeForkApp(JNIEnv * env,jclass,jint read_pipe_fd,jint write_pipe_fd,jintArray managed_session_socket_fds,jboolean args_known,jboolean is_priority_fork)2433 static jint com_android_internal_os_Zygote_nativeForkApp(JNIEnv* env,
2434                                                          jclass,
2435                                                          jint read_pipe_fd,
2436                                                          jint write_pipe_fd,
2437                                                          jintArray managed_session_socket_fds,
2438                                                          jboolean args_known,
2439                                                          jboolean is_priority_fork) {
2440   std::vector<int> session_socket_fds =
2441       ExtractJIntArray(env, "USAP", nullptr, managed_session_socket_fds)
2442           .value_or(std::vector<int>());
2443   return zygote::forkApp(env, read_pipe_fd, write_pipe_fd, session_socket_fds,
2444                             args_known == JNI_TRUE, is_priority_fork == JNI_TRUE, true);
2445 }
2446 
forkApp(JNIEnv * env,int read_pipe_fd,int write_pipe_fd,const std::vector<int> & session_socket_fds,bool args_known,bool is_priority_fork,bool purge)2447 int zygote::forkApp(JNIEnv* env,
2448                     int read_pipe_fd,
2449                     int write_pipe_fd,
2450                     const std::vector<int>& session_socket_fds,
2451                     bool args_known,
2452                     bool is_priority_fork,
2453                     bool purge) {
2454 
2455   std::vector<int> fds_to_close(MakeUsapPipeReadFDVector()),
2456                    fds_to_ignore(fds_to_close);
2457 
2458   fds_to_close.push_back(gZygoteSocketFD);
2459   if (gSystemServerSocketFd != -1) {
2460       fds_to_close.push_back(gSystemServerSocketFd);
2461   }
2462   if (args_known) {
2463       fds_to_close.push_back(gUsapPoolSocketFD);
2464   }
2465   fds_to_close.insert(fds_to_close.end(), session_socket_fds.begin(), session_socket_fds.end());
2466 
2467   fds_to_ignore.push_back(gUsapPoolSocketFD);
2468   fds_to_ignore.push_back(gZygoteSocketFD);
2469   if (read_pipe_fd != -1) {
2470       fds_to_ignore.push_back(read_pipe_fd);
2471   }
2472   if (write_pipe_fd != -1) {
2473       fds_to_ignore.push_back(write_pipe_fd);
2474   }
2475   fds_to_ignore.insert(fds_to_ignore.end(), session_socket_fds.begin(), session_socket_fds.end());
2476 
2477   if (gUsapPoolEventFD != -1) {
2478       fds_to_close.push_back(gUsapPoolEventFD);
2479       fds_to_ignore.push_back(gUsapPoolEventFD);
2480   }
2481   if (gSystemServerSocketFd != -1) {
2482       if (args_known) {
2483           fds_to_close.push_back(gSystemServerSocketFd);
2484       }
2485       fds_to_ignore.push_back(gSystemServerSocketFd);
2486   }
2487   if (gPreloadFds && gPreloadFdsExtracted) {
2488       fds_to_ignore.insert(fds_to_ignore.end(), gPreloadFds->begin(), gPreloadFds->end());
2489   }
2490 
2491   return zygote::ForkCommon(env, /* is_system_server= */ false, fds_to_close,
2492                             fds_to_ignore, is_priority_fork == JNI_TRUE, purge);
2493 }
2494 
com_android_internal_os_Zygote_nativeAllowFileAcrossFork(JNIEnv * env,jclass,jstring path)2495 static void com_android_internal_os_Zygote_nativeAllowFileAcrossFork(
2496         JNIEnv* env, jclass, jstring path) {
2497     ScopedUtfChars path_native(env, path);
2498     const char* path_cstr = path_native.c_str();
2499     if (!path_cstr) {
2500         RuntimeAbort(env, __LINE__, "path_cstr == nullptr");
2501     }
2502     FileDescriptorAllowlist::Get()->Allow(path_cstr);
2503 }
2504 
com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter(JNIEnv * env,jclass,jint uidGidMin,jint uidGidMax)2505 static void com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter(
2506         JNIEnv* env, jclass, jint uidGidMin, jint uidGidMax) {
2507   if (!gIsSecurityEnforced) {
2508     ALOGI("seccomp disabled by setenforce 0");
2509     return;
2510   }
2511 
2512   bool installed = install_setuidgid_seccomp_filter(uidGidMin, uidGidMax);
2513   if (!installed) {
2514       RuntimeAbort(env, __LINE__, "Could not install setuid/setgid seccomp filter.");
2515   }
2516 }
2517 
2518 /**
2519  * Called from an unspecialized app process to specialize the process for a
2520  * given application.
2521  *
2522  * @param env  Managed runtime environment
2523  * @param uid  User ID of the new application
2524  * @param gid  Group ID of the new application
2525  * @param gids  Extra groups that the process belongs to
2526  * @param runtime_flags  Flags for changing the behavior of the managed runtime
2527  * @param rlimits  Resource limits
2528  * @param mount_external  The mode (read/write/normal) that external storage will be mounted with
2529  * @param se_info  SELinux policy information
2530  * @param nice_name  New name for this process
2531  * @param is_child_zygote  If the process is to become a WebViewZygote
2532  * @param instruction_set  The instruction set expected/requested by the new application
2533  * @param app_data_dir  Path to the application's data directory
2534  * @param is_top_app  If the process is for top (high priority) application
2535  */
com_android_internal_os_Zygote_nativeSpecializeAppProcess(JNIEnv * env,jclass,jint uid,jint gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jint mount_external,jstring se_info,jstring nice_name,jboolean is_child_zygote,jstring instruction_set,jstring app_data_dir,jboolean is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,jboolean mount_data_dirs,jboolean mount_storage_dirs)2536 static void com_android_internal_os_Zygote_nativeSpecializeAppProcess(
2537         JNIEnv* env, jclass, jint uid, jint gid, jintArray gids, jint runtime_flags,
2538         jobjectArray rlimits, jint mount_external, jstring se_info, jstring nice_name,
2539         jboolean is_child_zygote, jstring instruction_set, jstring app_data_dir,
2540         jboolean is_top_app, jobjectArray pkg_data_info_list,
2541         jobjectArray allowlisted_data_info_list, jboolean mount_data_dirs,
2542         jboolean mount_storage_dirs) {
2543     jlong capabilities = CalculateCapabilities(env, uid, gid, gids, is_child_zygote);
2544 
2545     SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, capabilities, capabilities,
2546                      mount_external, se_info, nice_name, false, is_child_zygote == JNI_TRUE,
2547                      instruction_set, app_data_dir, is_top_app == JNI_TRUE, pkg_data_info_list,
2548                      allowlisted_data_info_list, mount_data_dirs == JNI_TRUE,
2549                      mount_storage_dirs == JNI_TRUE);
2550 }
2551 
2552 /**
2553  * A helper method for fetching socket file descriptors that were opened by init from the
2554  * environment.
2555  *
2556  * @param env  Managed runtime environment
2557  * @param is_primary  If this process is the primary or secondary Zygote; used to compute the name
2558  * of the environment variable storing the file descriptors.
2559  */
com_android_internal_os_Zygote_nativeInitNativeState(JNIEnv * env,jclass,jboolean is_primary)2560 static void com_android_internal_os_Zygote_nativeInitNativeState(JNIEnv* env, jclass,
2561                                                                  jboolean is_primary) {
2562   /*
2563    * Obtain file descriptors created by init from the environment.
2564    */
2565 
2566   gZygoteSocketFD =
2567       android_get_control_socket(is_primary ? "zygote" : "zygote_secondary");
2568   if (gZygoteSocketFD >= 0) {
2569     ALOGV("Zygote:zygoteSocketFD = %d", gZygoteSocketFD);
2570   } else {
2571     ALOGE("Unable to fetch Zygote socket file descriptor");
2572   }
2573 
2574   gUsapPoolSocketFD =
2575       android_get_control_socket(is_primary ? "usap_pool_primary" : "usap_pool_secondary");
2576   if (gUsapPoolSocketFD >= 0) {
2577     ALOGV("Zygote:usapPoolSocketFD = %d", gUsapPoolSocketFD);
2578   } else {
2579     ALOGE("Unable to fetch USAP pool socket file descriptor");
2580   }
2581 
2582   initUnsolSocketToSystemServer();
2583 
2584   /*
2585    * Security Initialization
2586    */
2587 
2588   // security_getenforce is not allowed on app process. Initialize and cache
2589   // the value before zygote forks.
2590   gIsSecurityEnforced = security_getenforce();
2591 
2592   selinux_android_seapp_context_init();
2593 
2594   /*
2595    * Storage Initialization
2596    */
2597 
2598   UnmountStorageOnInit(env);
2599 
2600   /*
2601    * Performance Initialization
2602    */
2603 
2604   if (!SetTaskProfiles(0, {})) {
2605     zygote::ZygoteFailure(env, "zygote", nullptr, "Zygote SetTaskProfiles failed");
2606   }
2607 }
2608 
2609 /**
2610  * @param env  Managed runtime environment
2611  * @return  A managed array of raw file descriptors for the read ends of the USAP reporting
2612  * pipes.
2613  */
com_android_internal_os_Zygote_nativeGetUsapPipeFDs(JNIEnv * env,jclass)2614 static jintArray com_android_internal_os_Zygote_nativeGetUsapPipeFDs(JNIEnv* env, jclass) {
2615   std::vector<int> usap_fds = MakeUsapPipeReadFDVector();
2616 
2617   jintArray managed_usap_fds = env->NewIntArray(usap_fds.size());
2618   env->SetIntArrayRegion(managed_usap_fds, 0, usap_fds.size(), usap_fds.data());
2619 
2620   return managed_usap_fds;
2621 }
2622 
2623 /*
2624  * Add the given pid and file descriptor to the Usap table. CriticalNative method.
2625  */
com_android_internal_os_Zygote_nativeAddUsapTableEntry(jint pid,jint read_pipe_fd)2626 static void com_android_internal_os_Zygote_nativeAddUsapTableEntry(jint pid, jint read_pipe_fd) {
2627   AddUsapTableEntry(pid, read_pipe_fd);
2628 }
2629 
2630 /**
2631  * A JNI wrapper around RemoveUsapTableEntry. CriticalNative method.
2632  *
2633  * @param env  Managed runtime environment
2634  * @param usap_pid  Process ID of the USAP entry to invalidate
2635  * @return  True if an entry was invalidated; false otherwise.
2636  */
com_android_internal_os_Zygote_nativeRemoveUsapTableEntry(jint usap_pid)2637 static jboolean com_android_internal_os_Zygote_nativeRemoveUsapTableEntry(jint usap_pid) {
2638   return RemoveUsapTableEntry(usap_pid);
2639 }
2640 
2641 /**
2642  * Creates the USAP pool event FD if it doesn't exist and returns it.  This is used by the
2643  * ZygoteServer poll loop to know when to re-fill the USAP pool.
2644  *
2645  * @param env  Managed runtime environment
2646  * @return A raw event file descriptor used to communicate (from the signal handler) when the
2647  * Zygote receives a SIGCHLD for a USAP
2648  */
com_android_internal_os_Zygote_nativeGetUsapPoolEventFD(JNIEnv * env,jclass)2649 static jint com_android_internal_os_Zygote_nativeGetUsapPoolEventFD(JNIEnv* env, jclass) {
2650   if (gUsapPoolEventFD == -1) {
2651     if ((gUsapPoolEventFD = eventfd(0, 0)) == -1) {
2652       zygote::ZygoteFailure(env, "zygote", nullptr,
2653                             StringPrintf("Unable to create eventfd: %s", strerror(errno)));
2654     }
2655   }
2656 
2657   return gUsapPoolEventFD;
2658 }
2659 
2660 /**
2661  * @param env  Managed runtime environment
2662  * @return The number of USAPs currently in the USAP pool
2663  */
com_android_internal_os_Zygote_nativeGetUsapPoolCount(JNIEnv * env,jclass)2664 static jint com_android_internal_os_Zygote_nativeGetUsapPoolCount(JNIEnv* env, jclass) {
2665   return gUsapPoolCount;
2666 }
2667 
2668 /**
2669  * Kills all processes currently in the USAP pool and closes their read pipe
2670  * FDs.
2671  *
2672  * @param env  Managed runtime environment
2673  */
com_android_internal_os_Zygote_nativeEmptyUsapPool(JNIEnv * env,jclass)2674 static void com_android_internal_os_Zygote_nativeEmptyUsapPool(JNIEnv* env, jclass) {
2675   for (auto& entry : gUsapTable) {
2676     auto entry_storage = entry.GetValues();
2677 
2678     if (entry_storage.has_value()) {
2679       kill(entry_storage.value().pid, SIGTERM);
2680 
2681       // Clean up the USAP table entry here.  This avoids a potential race
2682       // where a newly created USAP might not be able to find a valid table
2683       // entry if signal handler (which would normally do the cleanup) doesn't
2684       // run between now and when the new process is created.
2685 
2686       close(entry_storage.value().read_pipe_fd);
2687 
2688       // Avoid a second atomic load by invalidating instead of clearing.
2689       entry.Invalidate();
2690       --gUsapPoolCount;
2691     }
2692   }
2693 }
2694 
com_android_internal_os_Zygote_nativeBlockSigTerm(JNIEnv * env,jclass)2695 static void com_android_internal_os_Zygote_nativeBlockSigTerm(JNIEnv* env, jclass) {
2696   auto fail_fn = std::bind(zygote::ZygoteFailure, env, "usap", nullptr, _1);
2697   BlockSignal(SIGTERM, fail_fn);
2698 }
2699 
com_android_internal_os_Zygote_nativeUnblockSigTerm(JNIEnv * env,jclass)2700 static void com_android_internal_os_Zygote_nativeUnblockSigTerm(JNIEnv* env, jclass) {
2701   auto fail_fn = std::bind(zygote::ZygoteFailure, env, "usap", nullptr, _1);
2702   UnblockSignal(SIGTERM, fail_fn);
2703 }
2704 
com_android_internal_os_Zygote_nativeBoostUsapPriority(JNIEnv * env,jclass)2705 static void com_android_internal_os_Zygote_nativeBoostUsapPriority(JNIEnv* env, jclass) {
2706   setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MAX);
2707 }
2708 
com_android_internal_os_Zygote_nativeParseSigChld(JNIEnv * env,jclass,jbyteArray in,jint length,jintArray out)2709 static jint com_android_internal_os_Zygote_nativeParseSigChld(JNIEnv* env, jclass, jbyteArray in,
2710                                                               jint length, jintArray out) {
2711     if (length != sizeof(struct UnsolicitedZygoteMessageSigChld)) {
2712         // Apparently it's not the message we are expecting.
2713         return -1;
2714     }
2715     if (in == nullptr || out == nullptr) {
2716         // Invalid parameter
2717         jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2718         return -1;
2719     }
2720     ScopedByteArrayRO source(env, in);
2721     if (source.size() < length) {
2722         // Invalid parameter
2723         jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2724         return -1;
2725     }
2726     const struct UnsolicitedZygoteMessageSigChld* msg =
2727             reinterpret_cast<const struct UnsolicitedZygoteMessageSigChld*>(source.get());
2728 
2729     switch (msg->header.type) {
2730         case UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD: {
2731             ScopedIntArrayRW buf(env, out);
2732             if (buf.size() != 3) {
2733                 jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2734                 return UNSOLICITED_ZYGOTE_MESSAGE_TYPE_RESERVED;
2735             }
2736             buf[0] = msg->payload.pid;
2737             buf[1] = msg->payload.uid;
2738             buf[2] = msg->payload.status;
2739             return 3;
2740         }
2741         default:
2742             break;
2743     }
2744     return -1;
2745 }
2746 
com_android_internal_os_Zygote_nativeSupportsMemoryTagging(JNIEnv * env,jclass)2747 static jboolean com_android_internal_os_Zygote_nativeSupportsMemoryTagging(JNIEnv* env, jclass) {
2748 #if defined(__aarch64__)
2749   return mte_supported();
2750 #else
2751   return false;
2752 #endif
2753 }
2754 
com_android_internal_os_Zygote_nativeSupportsTaggedPointers(JNIEnv * env,jclass)2755 static jboolean com_android_internal_os_Zygote_nativeSupportsTaggedPointers(JNIEnv* env, jclass) {
2756 #ifdef __aarch64__
2757   int res = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
2758   return res >= 0 && res & PR_TAGGED_ADDR_ENABLE;
2759 #else
2760   return false;
2761 #endif
2762 }
2763 
com_android_internal_os_Zygote_nativeCurrentTaggingLevel(JNIEnv * env,jclass)2764 static jint com_android_internal_os_Zygote_nativeCurrentTaggingLevel(JNIEnv* env, jclass) {
2765 #if defined(__aarch64__)
2766   int level = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
2767   if (level < 0) {
2768     ALOGE("Failed to get memory tag level: %s", strerror(errno));
2769     return 0;
2770   } else if (!(level & PR_TAGGED_ADDR_ENABLE)) {
2771     return 0;
2772   }
2773   // TBI is only possible on non-MTE hardware.
2774   if (!mte_supported()) {
2775     return MEMORY_TAG_LEVEL_TBI;
2776   }
2777 
2778   switch (level & PR_MTE_TCF_MASK) {
2779     case PR_MTE_TCF_NONE:
2780       return 0;
2781     case PR_MTE_TCF_SYNC:
2782       return MEMORY_TAG_LEVEL_SYNC;
2783     case PR_MTE_TCF_ASYNC:
2784     case PR_MTE_TCF_ASYNC | PR_MTE_TCF_SYNC:
2785       return MEMORY_TAG_LEVEL_ASYNC;
2786     default:
2787       ALOGE("Unknown memory tagging level: %i", level);
2788       return 0;
2789   }
2790 #else // defined(__aarch64__)
2791   return 0;
2792 #endif // defined(__aarch64__)
2793 }
2794 
com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload(JNIEnv * env,jclass)2795 static void com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload(JNIEnv* env, jclass) {
2796     // Ignore invocations when too early or too late.
2797     if (gPreloadFds) {
2798         return;
2799     }
2800 
2801     // App Zygote Preload starts soon. Save FDs remaining open.  After the
2802     // preload finishes newly open files will be determined.
2803     auto fail_fn = std::bind(zygote::ZygoteFailure, env, "zygote", nullptr, _1);
2804     gPreloadFds = GetOpenFds(fail_fn).release();
2805 }
2806 
com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload(JNIEnv * env,jclass)2807 static void com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload(JNIEnv* env, jclass) {
2808     // Ignore invocations when too early or too late.
2809     if (!gPreloadFds || gPreloadFdsExtracted) {
2810         return;
2811     }
2812 
2813     // Find the newly open FDs, if any.
2814     auto fail_fn = std::bind(zygote::ZygoteFailure, env, "zygote", nullptr, _1);
2815     std::unique_ptr<std::set<int>> current_fds = GetOpenFds(fail_fn);
2816     auto difference = std::make_unique<std::set<int>>();
2817     std::set_difference(current_fds->begin(), current_fds->end(), gPreloadFds->begin(),
2818                         gPreloadFds->end(), std::inserter(*difference, difference->end()));
2819     delete gPreloadFds;
2820     gPreloadFds = difference.release();
2821     gPreloadFdsExtracted = true;
2822 }
2823 
2824 static const JNINativeMethod gMethods[] = {
2825         {"nativeForkAndSpecialize",
2826          "(II[II[[IILjava/lang/String;Ljava/lang/String;[I[IZLjava/lang/String;Ljava/lang/"
2827          "String;Z[Ljava/lang/String;[Ljava/lang/String;ZZ)I",
2828          (void*)com_android_internal_os_Zygote_nativeForkAndSpecialize},
2829         {"nativeForkSystemServer", "(II[II[[IJJ)I",
2830          (void*)com_android_internal_os_Zygote_nativeForkSystemServer},
2831         {"nativeAllowFileAcrossFork", "(Ljava/lang/String;)V",
2832          (void*)com_android_internal_os_Zygote_nativeAllowFileAcrossFork},
2833         {"nativePreApplicationInit", "()V",
2834          (void*)com_android_internal_os_Zygote_nativePreApplicationInit},
2835         {"nativeInstallSeccompUidGidFilter", "(II)V",
2836          (void*)com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter},
2837         {"nativeForkApp", "(II[IZZ)I", (void*)com_android_internal_os_Zygote_nativeForkApp},
2838         // @CriticalNative
2839         {"nativeAddUsapTableEntry", "(II)V",
2840          (void*)com_android_internal_os_Zygote_nativeAddUsapTableEntry},
2841         {"nativeSpecializeAppProcess",
2842          "(II[II[[IILjava/lang/String;Ljava/lang/String;ZLjava/lang/String;Ljava/lang/"
2843          "String;Z[Ljava/lang/String;[Ljava/lang/String;ZZ)V",
2844          (void*)com_android_internal_os_Zygote_nativeSpecializeAppProcess},
2845         {"nativeInitNativeState", "(Z)V",
2846          (void*)com_android_internal_os_Zygote_nativeInitNativeState},
2847         {"nativeGetUsapPipeFDs", "()[I",
2848          (void*)com_android_internal_os_Zygote_nativeGetUsapPipeFDs},
2849         // @CriticalNative
2850         {"nativeAddUsapTableEntry", "(II)V",
2851          (void*)com_android_internal_os_Zygote_nativeAddUsapTableEntry},
2852         // @CriticalNative
2853         {"nativeRemoveUsapTableEntry", "(I)Z",
2854          (void*)com_android_internal_os_Zygote_nativeRemoveUsapTableEntry},
2855         {"nativeGetUsapPoolEventFD", "()I",
2856          (void*)com_android_internal_os_Zygote_nativeGetUsapPoolEventFD},
2857         {"nativeGetUsapPoolCount", "()I",
2858          (void*)com_android_internal_os_Zygote_nativeGetUsapPoolCount},
2859         {"nativeEmptyUsapPool", "()V", (void*)com_android_internal_os_Zygote_nativeEmptyUsapPool},
2860         {"nativeBlockSigTerm", "()V", (void*)com_android_internal_os_Zygote_nativeBlockSigTerm},
2861         {"nativeUnblockSigTerm", "()V", (void*)com_android_internal_os_Zygote_nativeUnblockSigTerm},
2862         {"nativeBoostUsapPriority", "()V",
2863          (void*)com_android_internal_os_Zygote_nativeBoostUsapPriority},
2864         {"nativeParseSigChld", "([BI[I)I",
2865          (void*)com_android_internal_os_Zygote_nativeParseSigChld},
2866         {"nativeSupportsMemoryTagging", "()Z",
2867          (void*)com_android_internal_os_Zygote_nativeSupportsMemoryTagging},
2868         {"nativeSupportsTaggedPointers", "()Z",
2869          (void*)com_android_internal_os_Zygote_nativeSupportsTaggedPointers},
2870         {"nativeCurrentTaggingLevel", "()I",
2871          (void*)com_android_internal_os_Zygote_nativeCurrentTaggingLevel},
2872         {"nativeMarkOpenedFilesBeforePreload", "()V",
2873          (void*)com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload},
2874         {"nativeAllowFilesOpenedByPreload", "()V",
2875          (void*)com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload},
2876 };
2877 
register_com_android_internal_os_Zygote(JNIEnv * env)2878 int register_com_android_internal_os_Zygote(JNIEnv* env) {
2879   gZygoteClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteClassName));
2880   gCallPostForkSystemServerHooks = GetStaticMethodIDOrDie(env, gZygoteClass,
2881                                                           "callPostForkSystemServerHooks",
2882                                                           "(I)V");
2883   gCallPostForkChildHooks = GetStaticMethodIDOrDie(env, gZygoteClass, "callPostForkChildHooks",
2884                                                    "(IZZLjava/lang/String;)V");
2885 
2886   gZygoteInitClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteInitClassName));
2887   gGetOrCreateSystemServerClassLoader =
2888           GetStaticMethodIDOrDie(env, gZygoteInitClass, "getOrCreateSystemServerClassLoader",
2889                                  "()Ljava/lang/ClassLoader;");
2890   gPrefetchStandaloneSystemServerJars =
2891           GetStaticMethodIDOrDie(env, gZygoteInitClass, "prefetchStandaloneSystemServerJars",
2892                                  "()V");
2893 
2894   RegisterMethodsOrDie(env, "com/android/internal/os/Zygote", gMethods, NELEM(gMethods));
2895 
2896   return JNI_OK;
2897 }
2898 }  // namespace android
2899