• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Zygote"
18 #define ATRACE_TAG ATRACE_TAG_DALVIK
19 
20 #include "com_android_internal_os_Zygote.h"
21 
22 #include <algorithm>
23 #include <array>
24 #include <atomic>
25 #include <functional>
26 #include <iterator>
27 #include <list>
28 #include <optional>
29 #include <sstream>
30 #include <string>
31 #include <string_view>
32 #include <unordered_set>
33 
34 #include <android/fdsan.h>
35 #include <arpa/inet.h>
36 #include <dirent.h>
37 #include <fcntl.h>
38 #include <grp.h>
39 #include <inttypes.h>
40 #include <malloc.h>
41 #include <mntent.h>
42 #include <signal.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <sys/capability.h>
46 #include <sys/eventfd.h>
47 #include <sys/mount.h>
48 #include <sys/personality.h>
49 #include <sys/prctl.h>
50 #include <sys/resource.h>
51 #include <sys/socket.h>
52 #include <sys/stat.h>
53 #include <sys/system_properties.h>
54 #include <sys/time.h>
55 #include <sys/types.h>
56 #include <sys/un.h>
57 #include <sys/wait.h>
58 #include <unistd.h>
59 
60 #include <async_safe/log.h>
61 #include <android-base/file.h>
62 #include <android-base/logging.h>
63 #include <android-base/properties.h>
64 #include <android-base/stringprintf.h>
65 #include <android-base/unique_fd.h>
66 #include <bionic/malloc.h>
67 #include <bionic/mte.h>
68 #include <cutils/fs.h>
69 #include <cutils/multiuser.h>
70 #include <cutils/sockets.h>
71 #include <private/android_filesystem_config.h>
72 #include <processgroup/processgroup.h>
73 #include <processgroup/sched_policy.h>
74 #include <seccomp_policy.h>
75 #include <selinux/android.h>
76 #include <stats_socket.h>
77 #include <utils/String8.h>
78 #include <utils/Trace.h>
79 
80 #include <nativehelper/JNIHelp.h>
81 #include <nativehelper/ScopedLocalRef.h>
82 #include <nativehelper/ScopedPrimitiveArray.h>
83 #include <nativehelper/ScopedUtfChars.h>
84 #include "core_jni_helpers.h"
85 #include "fd_utils.h"
86 #include "filesystem_utils.h"
87 
88 #include "nativebridge/native_bridge.h"
89 
90 #if defined(__BIONIC__)
91 #include <android/dlext_private.h>
92 extern "C" void android_reset_stack_guards();
93 #endif
94 
95 namespace {
96 
97 // TODO (chriswailes): Add a function to initialize native Zygote data.
98 // TODO (chriswailes): Fix mixed indentation style (2 and 4 spaces).
99 
100 using namespace std::placeholders;
101 
102 using android::String8;
103 using android::base::ReadFileToString;
104 using android::base::StringAppendF;
105 using android::base::StringPrintf;
106 using android::base::WriteStringToFile;
107 using android::base::GetBoolProperty;
108 
109 using android::zygote::ZygoteFailure;
110 
111 using Mode = android_mallopt_gwp_asan_options_t::Mode;
112 
113 // This type is duplicated in fd_utils.h
114 typedef const std::function<void(std::string)>& fail_fn_t;
115 
116 static pid_t gSystemServerPid = 0;
117 
118 static constexpr const char* kVoldAppDataIsolation = "persist.sys.vold_app_data_isolation_enabled";
119 static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
120 static jclass gZygoteClass;
121 static jmethodID gCallPostForkSystemServerHooks;
122 static jmethodID gCallPostForkChildHooks;
123 
124 static constexpr const char* kZygoteInitClassName = "com/android/internal/os/ZygoteInit";
125 static jclass gZygoteInitClass;
126 static jmethodID gGetOrCreateSystemServerClassLoader;
127 static jmethodID gPrefetchStandaloneSystemServerJars;
128 
129 static bool gIsSecurityEnforced = true;
130 
131 /**
132  * True if the app process is running in its mount namespace.
133  */
134 static bool gInAppMountNamespace = false;
135 
136 /**
137  * The maximum number of characters (not including a null terminator) that a
138  * process name may contain.
139  */
140 static constexpr size_t MAX_NAME_LENGTH = 15;
141 
142 /**
143  * The file descriptor for the Zygote socket opened by init.
144  */
145 
146 static int gZygoteSocketFD = -1;
147 
148 /**
149  * The file descriptor for the unspecialized app process (USAP) pool socket opened by init.
150  */
151 
152 static int gUsapPoolSocketFD = -1;
153 
154 /**
155  * The number of USAPs currently in this Zygote's pool.
156  */
157 static std::atomic_uint32_t gUsapPoolCount = 0;
158 
159 /**
160  * Event file descriptor used to communicate reaped USAPs to the
161  * ZygoteServer.
162  */
163 static int gUsapPoolEventFD = -1;
164 
165 /**
166  * The socket file descriptor used to send notifications to the
167  * system_server.
168  */
169 static int gSystemServerSocketFd = -1;
170 
171 static constexpr int DEFAULT_DATA_DIR_PERMISSION = 0751;
172 
173 static constexpr const uint64_t UPPER_HALF_WORD_MASK = 0xFFFF'FFFF'0000'0000;
174 static constexpr const uint64_t LOWER_HALF_WORD_MASK = 0x0000'0000'FFFF'FFFF;
175 
176 static constexpr const char* kCurProfileDirPath = "/data/misc/profiles/cur";
177 static constexpr const char* kRefProfileDirPath = "/data/misc/profiles/ref";
178 
179 /**
180  * The maximum value that the gUSAPPoolSizeMax variable may take.  This value
181  * is a mirror of ZygoteServer.USAP_POOL_SIZE_MAX_LIMIT
182  */
183 static constexpr int USAP_POOL_SIZE_MAX_LIMIT = 100;
184 
185 /** The numeric value for the maximum priority a process may possess. */
186 static constexpr int PROCESS_PRIORITY_MAX = -20;
187 
188 /** The numeric value for the minimum priority a process may possess. */
189 static constexpr int PROCESS_PRIORITY_MIN = 19;
190 
191 /** The numeric value for the normal priority a process should have. */
192 static constexpr int PROCESS_PRIORITY_DEFAULT = 0;
193 
194 /** Exponential back off parameters for storage dir check. */
195 static constexpr unsigned int STORAGE_DIR_CHECK_RETRY_MULTIPLIER = 2;
196 static constexpr unsigned int STORAGE_DIR_CHECK_INIT_INTERVAL_US = 50;
197 static constexpr unsigned int STORAGE_DIR_CHECK_MAX_INTERVAL_US = 1000;
198 /**
199  * Lower bound time we allow storage dir check to sleep.
200  * If it exceeds 2s, PROC_START_TIMEOUT_MSG will kill the starting app anyway,
201  * so it's fine to assume max retries is 5 mins.
202  */
203 static constexpr int STORAGE_DIR_CHECK_TIMEOUT_US = 1000 * 1000 * 60 * 5;
204 
205 static void WaitUntilDirReady(const std::string& target, fail_fn_t fail_fn);
206 
207 /**
208  * A helper class containing accounting information for USAPs.
209  */
210 class UsapTableEntry {
211  public:
212   struct EntryStorage {
213     int32_t pid;
214     int32_t read_pipe_fd;
215 
operator !=__anon0c1440050111::UsapTableEntry::EntryStorage216     bool operator!=(const EntryStorage& other) {
217       return pid != other.pid || read_pipe_fd != other.read_pipe_fd;
218     }
219   };
220 
221  private:
222   static constexpr EntryStorage INVALID_ENTRY_VALUE = {-1, -1};
223 
224   std::atomic<EntryStorage> mStorage;
225   static_assert(decltype(mStorage)::is_always_lock_free);  // Accessed from signal handler.
226 
227  public:
UsapTableEntry()228   constexpr UsapTableEntry() : mStorage(INVALID_ENTRY_VALUE) {}
229 
230   /**
231    * If the provided PID matches the one stored in this entry, the entry will
232    * be invalidated and the associated file descriptor will be closed.  If the
233    * PIDs don't match nothing will happen.
234    *
235    * @param pid The ID of the process who's entry we want to clear.
236    * @return True if the entry was cleared by this call; false otherwise
237    */
ClearForPID(int32_t pid)238   bool ClearForPID(int32_t pid) {
239     EntryStorage storage = mStorage.load();
240 
241     if (storage.pid == pid) {
242       /*
243        * There are three possible outcomes from this compare-and-exchange:
244        *   1) It succeeds, in which case we close the FD
245        *   2) It fails and the new value is INVALID_ENTRY_VALUE, in which case
246        *      the entry has already been cleared.
247        *   3) It fails and the new value isn't INVALID_ENTRY_VALUE, in which
248        *      case the entry has already been cleared and re-used.
249        *
250        * In all three cases the goal of the caller has been met, but only in
251        * the first case do we need to decrement the pool count.
252        */
253       if (mStorage.compare_exchange_strong(storage, INVALID_ENTRY_VALUE)) {
254         close(storage.read_pipe_fd);
255         return true;
256       } else {
257         return false;
258       }
259 
260     } else {
261       return false;
262     }
263   }
264 
Clear()265   void Clear() {
266     EntryStorage storage = mStorage.load();
267 
268     if (storage != INVALID_ENTRY_VALUE) {
269       close(storage.read_pipe_fd);
270       mStorage.store(INVALID_ENTRY_VALUE);
271     }
272   }
273 
Invalidate()274   void Invalidate() {
275     mStorage.store(INVALID_ENTRY_VALUE);
276   }
277 
278   /**
279    * @return A copy of the data stored in this entry.
280    */
GetValues()281   std::optional<EntryStorage> GetValues() {
282     EntryStorage storage = mStorage.load();
283 
284     if (storage != INVALID_ENTRY_VALUE) {
285       return storage;
286     } else {
287       return std::nullopt;
288     }
289   }
290 
291   /**
292    * Sets the entry to the given values if it is currently invalid.
293    *
294    * @param pid  The process ID for the new entry.
295    * @param read_pipe_fd  The read end of the USAP control pipe for this
296    * process.
297    * @return True if the entry was set; false otherwise.
298    */
SetIfInvalid(int32_t pid,int32_t read_pipe_fd)299   bool SetIfInvalid(int32_t pid, int32_t read_pipe_fd) {
300     EntryStorage new_value_storage;
301 
302     new_value_storage.pid = pid;
303     new_value_storage.read_pipe_fd = read_pipe_fd;
304 
305     EntryStorage expected = INVALID_ENTRY_VALUE;
306 
307     return mStorage.compare_exchange_strong(expected, new_value_storage);
308   }
309 };
310 
311 /**
312  * A table containing information about the USAPs currently in the pool.
313  *
314  * Multiple threads may be attempting to modify the table, either from the
315  * signal handler or from the ZygoteServer poll loop.  Atomic loads/stores in
316  * the USAPTableEntry class prevent data races during these concurrent
317  * operations.
318  */
319 static std::array<UsapTableEntry, USAP_POOL_SIZE_MAX_LIMIT> gUsapTable;
320 
321 /**
322  * The list of open zygote file descriptors.
323  */
324 static FileDescriptorTable* gOpenFdTable = nullptr;
325 
326 // Must match values in com.android.internal.os.Zygote.
327 // The values should be consistent with IVold.aidl
328 enum MountExternalKind {
329     MOUNT_EXTERNAL_NONE = 0,
330     MOUNT_EXTERNAL_DEFAULT = 1,
331     MOUNT_EXTERNAL_INSTALLER = 2,
332     MOUNT_EXTERNAL_PASS_THROUGH = 3,
333     MOUNT_EXTERNAL_ANDROID_WRITABLE = 4,
334     MOUNT_EXTERNAL_COUNT = 5
335 };
336 
337 // Must match values in com.android.internal.os.Zygote.
338 enum RuntimeFlags : uint32_t {
339     DEBUG_ENABLE_JDWP = 1,
340     PROFILE_SYSTEM_SERVER = 1 << 14,
341     PROFILE_FROM_SHELL = 1 << 15,
342     MEMORY_TAG_LEVEL_MASK = (1 << 19) | (1 << 20),
343     MEMORY_TAG_LEVEL_TBI = 1 << 19,
344     MEMORY_TAG_LEVEL_ASYNC = 2 << 19,
345     MEMORY_TAG_LEVEL_SYNC = 3 << 19,
346     GWP_ASAN_LEVEL_MASK = (1 << 21) | (1 << 22),
347     GWP_ASAN_LEVEL_NEVER = 0 << 21,
348     GWP_ASAN_LEVEL_LOTTERY = 1 << 21,
349     GWP_ASAN_LEVEL_ALWAYS = 2 << 21,
350     GWP_ASAN_LEVEL_DEFAULT = 3 << 21,
351     NATIVE_HEAP_ZERO_INIT_ENABLED = 1 << 23,
352     PROFILEABLE = 1 << 24,
353     DEBUG_ENABLE_PTRACE = 1 << 25,
354     ENABLE_PAGE_SIZE_APP_COMPAT = 1 << 26,
355 };
356 
357 enum UnsolicitedZygoteMessageTypes : uint32_t {
358     UNSOLICITED_ZYGOTE_MESSAGE_TYPE_RESERVED = 0,
359     UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD = 1,
360 };
361 
362 struct UnsolicitedZygoteMessageSigChld {
363     struct {
364         UnsolicitedZygoteMessageTypes type;
365     } header;
366     struct {
367         pid_t pid;
368         uid_t uid;
369         int status;
370     } payload;
371 };
372 
373 // Keep sync with services/core/java/com/android/server/am/ProcessList.java
374 static constexpr struct sockaddr_un kSystemServerSockAddr =
375         {.sun_family = AF_LOCAL, .sun_path = "/data/system/unsolzygotesocket"};
376 
377 // Forward declaration so we don't have to move the signal handler.
378 static bool RemoveUsapTableEntry(pid_t usap_pid);
379 
RuntimeAbort(JNIEnv * env,int line,const char * msg)380 static void RuntimeAbort(JNIEnv* env, int line, const char* msg) {
381   std::ostringstream oss;
382   oss << __FILE__ << ":" << line << ": " << msg;
383   env->FatalError(oss.str().c_str());
384 }
385 
386 // Create the socket which is going to be used to send unsolicited message
387 // to system_server, the socket will be closed post forking a child process.
388 // It's expected to be called at each zygote's initialization.
initUnsolSocketToSystemServer()389 static void initUnsolSocketToSystemServer() {
390     gSystemServerSocketFd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_NONBLOCK, 0);
391     if (gSystemServerSocketFd >= 0) {
392         ALOGV("Zygote:systemServerSocketFD = %d", gSystemServerSocketFd);
393     } else {
394         ALOGE("Unable to create socket file descriptor to connect to system_server");
395     }
396 }
397 
sendSigChildStatus(const pid_t pid,const uid_t uid,const int status)398 static void sendSigChildStatus(const pid_t pid, const uid_t uid, const int status) {
399     int socketFd = gSystemServerSocketFd;
400     if (socketFd >= 0) {
401         // fill the message buffer
402         struct UnsolicitedZygoteMessageSigChld data =
403                 {.header = {.type = UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD},
404                  .payload = {.pid = pid, .uid = uid, .status = status}};
405         if (TEMP_FAILURE_RETRY(
406                     sendto(socketFd, &data, sizeof(data), 0,
407                            reinterpret_cast<const struct sockaddr*>(&kSystemServerSockAddr),
408                            sizeof(kSystemServerSockAddr))) == -1) {
409             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
410                                   "Zygote failed to write to system_server FD: %s",
411                                   strerror(errno));
412         }
413     }
414 }
415 
416 // This signal handler is for zygote mode, since the zygote must reap its children
417 NO_STACK_PROTECTOR
SigChldHandler(int,siginfo_t * info,void *)418 static void SigChldHandler(int /*signal_number*/, siginfo_t* info, void* /*ucontext*/) {
419     pid_t pid;
420     int status;
421     int64_t usaps_removed = 0;
422 
423     // It's necessary to save and restore the errno during this function.
424     // Since errno is stored per thread, changing it here modifies the errno
425     // on the thread on which this signal handler executes. If a signal occurs
426     // between a call and an errno check, it's possible to get the errno set
427     // here.
428     // See b/23572286 for extra information.
429     int saved_errno = errno;
430 
431     while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
432         // Notify system_server that we received a SIGCHLD
433         sendSigChildStatus(pid, info->si_uid, status);
434         // Log process-death status that we care about.
435         if (WIFEXITED(status)) {
436             async_safe_format_log(ANDROID_LOG_INFO, LOG_TAG, "Process %d exited cleanly (%d)", pid,
437                                   WEXITSTATUS(status));
438 
439             // Check to see if the PID is in the USAP pool and remove it if it is.
440             if (RemoveUsapTableEntry(pid)) {
441                 ++usaps_removed;
442             }
443         } else if (WIFSIGNALED(status)) {
444             async_safe_format_log(ANDROID_LOG_INFO, LOG_TAG,
445                                   "Process %d exited due to signal %d (%s)%s", pid,
446                                   WTERMSIG(status), strsignal(WTERMSIG(status)),
447                                   WCOREDUMP(status) ? "; core dumped" : "");
448 
449             // If the process exited due to a signal other than SIGTERM, check to see
450             // if the PID is in the USAP pool and remove it if it is.  If the process
451             // was closed by the Zygote using SIGTERM then the USAP pool entry will
452             // have already been removed (see nativeEmptyUsapPool()).
453             if (WTERMSIG(status) != SIGTERM && RemoveUsapTableEntry(pid)) {
454                 ++usaps_removed;
455             }
456         }
457 
458         // If the just-crashed process is the system_server, bring down zygote
459         // so that it is restarted by init and system server will be restarted
460         // from there.
461         if (pid == gSystemServerPid) {
462             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
463                                   "Exit zygote because system server (pid %d) has terminated", pid);
464             kill(getpid(), SIGKILL);
465         }
466     }
467 
468     // Note that we shouldn't consider ECHILD an error because
469     // the secondary zygote might have no children left to wait for.
470     if (pid < 0 && errno != ECHILD) {
471         async_safe_format_log(ANDROID_LOG_WARN, LOG_TAG, "Zygote SIGCHLD error in waitpid: %s",
472                               strerror(errno));
473     }
474 
475     if (usaps_removed > 0) {
476         if (TEMP_FAILURE_RETRY(write(gUsapPoolEventFD, &usaps_removed, sizeof(usaps_removed))) ==
477             -1) {
478             // If this write fails something went terribly wrong.  We will now kill
479             // the zygote and let the system bring it back up.
480             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
481                                   "Zygote failed to write to USAP pool event FD: %s",
482                                   strerror(errno));
483             kill(getpid(), SIGKILL);
484         }
485     }
486 
487     errno = saved_errno;
488 }
489 
490 // Configures the SIGCHLD/SIGHUP handlers for the zygote process. This is
491 // configured very late, because earlier in the runtime we may fork() and
492 // exec() other processes, and we want to waitpid() for those rather than
493 // have them be harvested immediately.
494 //
495 // Ignore SIGHUP because all processes forked by the zygote are in the same
496 // process group as the zygote and we don't want to be notified if we become
497 // an orphaned group and have one or more stopped processes. This is not a
498 // theoretical concern :
499 // - we can become an orphaned group if one of our direct descendants forks
500 //   and is subsequently killed before its children.
501 // - crash_dump routinely STOPs the process it's tracing.
502 //
503 // See issues b/71965619 and b/25567761 for further details.
504 //
505 // This ends up being called repeatedly before each fork(), but there's
506 // no real harm in that.
SetSignalHandlers()507 static void SetSignalHandlers() {
508     struct sigaction sig_chld = {.sa_flags = SA_SIGINFO, .sa_sigaction = SigChldHandler};
509 
510     if (sigaction(SIGCHLD, &sig_chld, nullptr) < 0) {
511         ALOGW("Error setting SIGCHLD handler: %s", strerror(errno));
512     }
513 
514   struct sigaction sig_hup = {};
515   sig_hup.sa_handler = SIG_IGN;
516   if (sigaction(SIGHUP, &sig_hup, nullptr) < 0) {
517     ALOGW("Error setting SIGHUP handler: %s", strerror(errno));
518   }
519 }
520 
521 // Sets the SIGCHLD handler back to default behavior in zygote children.
UnsetChldSignalHandler()522 static void UnsetChldSignalHandler() {
523   struct sigaction sa;
524   memset(&sa, 0, sizeof(sa));
525   sa.sa_handler = SIG_DFL;
526 
527   if (sigaction(SIGCHLD, &sa, nullptr) < 0) {
528     ALOGW("Error unsetting SIGCHLD handler: %s", strerror(errno));
529   }
530 }
531 
532 // Calls POSIX setgroups() using the int[] object as an argument.
533 // A nullptr argument is tolerated.
SetGids(JNIEnv * env,jintArray managed_gids,jboolean is_child_zygote,fail_fn_t fail_fn)534 static void SetGids(JNIEnv* env, jintArray managed_gids, jboolean is_child_zygote,
535                     fail_fn_t fail_fn) {
536   if (managed_gids == nullptr) {
537     if (is_child_zygote) {
538       // For child zygotes like webview and app zygote, we want to clear out
539       // any supplemental groups the parent zygote had.
540       if (setgroups(0, NULL) == -1) {
541         fail_fn(CREATE_ERROR("Failed to remove supplementary groups for child zygote"));
542       }
543     }
544     return;
545   }
546 
547   ScopedIntArrayRO gids(env, managed_gids);
548   if (gids.get() == nullptr) {
549     fail_fn(CREATE_ERROR("Getting gids int array failed"));
550   }
551 
552   if (setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0])) == -1) {
553     fail_fn(CREATE_ERROR("setgroups failed: %s, gids.size=%zu", strerror(errno), gids.size()));
554   }
555 }
556 
ensureInAppMountNamespace(fail_fn_t fail_fn)557 static void ensureInAppMountNamespace(fail_fn_t fail_fn) {
558   if (gInAppMountNamespace) {
559     // In app mount namespace already
560     return;
561   }
562   if (unshare(CLONE_NEWNS) == -1) {
563     fail_fn(CREATE_ERROR("Failed to unshare(): %s", strerror(errno)));
564   }
565   gInAppMountNamespace = true;
566 }
567 
568 // Sets the resource limits via setrlimit(2) for the values in the
569 // two-dimensional array of integers that's passed in. The second dimension
570 // contains a tuple of length 3: (resource, rlim_cur, rlim_max). nullptr is
571 // treated as an empty array.
SetRLimits(JNIEnv * env,jobjectArray managed_rlimits,fail_fn_t fail_fn)572 static void SetRLimits(JNIEnv* env, jobjectArray managed_rlimits, fail_fn_t fail_fn) {
573   if (managed_rlimits == nullptr) {
574     return;
575   }
576 
577   rlimit rlim;
578   memset(&rlim, 0, sizeof(rlim));
579 
580   for (int i = 0; i < env->GetArrayLength(managed_rlimits); ++i) {
581     ScopedLocalRef<jobject>
582         managed_rlimit_object(env, env->GetObjectArrayElement(managed_rlimits, i));
583     ScopedIntArrayRO rlimit_handle(env, reinterpret_cast<jintArray>(managed_rlimit_object.get()));
584 
585     if (rlimit_handle.size() != 3) {
586       fail_fn(CREATE_ERROR("rlimits array must have a second dimension of size 3"));
587     }
588 
589     rlim.rlim_cur = rlimit_handle[1];
590     rlim.rlim_max = rlimit_handle[2];
591 
592     if (setrlimit(rlimit_handle[0], &rlim) == -1) {
593       fail_fn(CREATE_ERROR("setrlimit(%d, {%ld, %ld}) failed",
594                            rlimit_handle[0], rlim.rlim_cur, rlim.rlim_max));
595     }
596   }
597 }
598 
EnableDebugger()599 static void EnableDebugger() {
600   // To let a non-privileged gdbserver attach to this
601   // process, we must set our dumpable flag.
602   if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
603     ALOGE("prctl(PR_SET_DUMPABLE) failed");
604   }
605 
606   // A non-privileged native debugger should be able to attach to the debuggable app, even if Yama
607   // is enabled (see kernel/Documentation/security/Yama.txt).
608   if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == -1) {
609     // if Yama is off prctl(PR_SET_PTRACER) returns EINVAL - don't log in this
610     // case since it's expected behaviour.
611     if (errno != EINVAL) {
612       ALOGE("prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) failed");
613     }
614   }
615 
616   // Set the core dump size to zero unless wanted (see also coredump_setup in build/envsetup.sh).
617   if (!GetBoolProperty("persist.zygote.core_dump", false)) {
618     // Set the soft limit on core dump size to 0 without changing the hard limit.
619     rlimit rl;
620     if (getrlimit(RLIMIT_CORE, &rl) == -1) {
621       ALOGE("getrlimit(RLIMIT_CORE) failed");
622     } else {
623       rl.rlim_cur = 0;
624       if (setrlimit(RLIMIT_CORE, &rl) == -1) {
625         ALOGE("setrlimit(RLIMIT_CORE) failed");
626       }
627     }
628   }
629 }
630 
PreApplicationInit()631 static void PreApplicationInit() {
632   // The child process sets this to indicate it's not the zygote.
633   android_mallopt(M_SET_ZYGOTE_CHILD, nullptr, 0);
634 
635   // Set the jemalloc decay time to 1.
636   mallopt(M_DECAY_TIME, 1);
637 }
638 
SetUpSeccompFilter(uid_t uid,bool is_child_zygote)639 static void SetUpSeccompFilter(uid_t uid, bool is_child_zygote) {
640   if (!gIsSecurityEnforced) {
641     ALOGI("seccomp disabled by setenforce 0");
642     return;
643   }
644 
645   // Apply system or app filter based on uid.
646   if (uid >= AID_APP_START) {
647     if (is_child_zygote) {
648       set_app_zygote_seccomp_filter();
649     } else {
650       set_app_seccomp_filter();
651     }
652   } else {
653     set_system_seccomp_filter();
654   }
655 }
656 
EnableKeepCapabilities(fail_fn_t fail_fn)657 static void EnableKeepCapabilities(fail_fn_t fail_fn) {
658   if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == -1) {
659     fail_fn(CREATE_ERROR("prctl(PR_SET_KEEPCAPS) failed: %s", strerror(errno)));
660   }
661 }
662 
DropCapabilitiesBoundingSet(fail_fn_t fail_fn,jlong bounding_capabilities)663 static void DropCapabilitiesBoundingSet(fail_fn_t fail_fn, jlong bounding_capabilities) {
664   for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {;
665     if ((1LL << i) & bounding_capabilities) continue;
666     if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0) == -1) {
667       if (errno == EINVAL) {
668         ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
669               "your kernel is compiled with file capabilities support");
670       } else {
671         fail_fn(CREATE_ERROR("prctl(PR_CAPBSET_DROP, %d) failed: %s", i, strerror(errno)));
672       }
673     }
674   }
675 }
676 
MatchGid(JNIEnv * env,jintArray gids,jint gid,jint gid_to_find)677 static bool MatchGid(JNIEnv* env, jintArray gids, jint gid, jint gid_to_find) {
678   if (gid == gid_to_find) return true;
679 
680   if (gids == nullptr) return false;
681 
682   jsize gids_num = env->GetArrayLength(gids);
683   ScopedIntArrayRO native_gid_proxy(env, gids);
684 
685   if (native_gid_proxy.get() == nullptr) {
686     RuntimeAbort(env, __LINE__, "Bad gids array");
687   }
688 
689   for (int gids_index = 0; gids_index < gids_num; ++gids_index) {
690     if (native_gid_proxy[gids_index] == gid_to_find) {
691       return true;
692     }
693   }
694 
695   return false;
696 }
697 
SetInheritable(uint64_t inheritable,fail_fn_t fail_fn)698 static void SetInheritable(uint64_t inheritable, fail_fn_t fail_fn) {
699   __user_cap_header_struct capheader;
700   memset(&capheader, 0, sizeof(capheader));
701   capheader.version = _LINUX_CAPABILITY_VERSION_3;
702   capheader.pid = 0;
703 
704   __user_cap_data_struct capdata[2];
705   if (capget(&capheader, &capdata[0]) == -1) {
706     fail_fn(CREATE_ERROR("capget failed: %s", strerror(errno)));
707   }
708 
709   capdata[0].inheritable = inheritable;
710   capdata[1].inheritable = inheritable >> 32;
711 
712   if (capset(&capheader, &capdata[0]) == -1) {
713     fail_fn(CREATE_ERROR("capset(inh=%" PRIx64 ") failed: %s", inheritable, strerror(errno)));
714   }
715 }
716 
SetCapabilities(uint64_t permitted,uint64_t effective,uint64_t inheritable,fail_fn_t fail_fn)717 static void SetCapabilities(uint64_t permitted, uint64_t effective, uint64_t inheritable,
718                             fail_fn_t fail_fn) {
719   __user_cap_header_struct capheader;
720   memset(&capheader, 0, sizeof(capheader));
721   capheader.version = _LINUX_CAPABILITY_VERSION_3;
722   capheader.pid = 0;
723 
724   __user_cap_data_struct capdata[2];
725   memset(&capdata, 0, sizeof(capdata));
726   capdata[0].effective = effective;
727   capdata[1].effective = effective >> 32;
728   capdata[0].permitted = permitted;
729   capdata[1].permitted = permitted >> 32;
730   capdata[0].inheritable = inheritable;
731   capdata[1].inheritable = inheritable >> 32;
732 
733   if (capset(&capheader, &capdata[0]) == -1) {
734     fail_fn(CREATE_ERROR("capset(perm=%" PRIx64 ", eff=%" PRIx64 ", inh=%" PRIx64 ") "
735                          "failed: %s", permitted, effective, inheritable, strerror(errno)));
736   }
737 }
738 
SetSchedulerPolicy(fail_fn_t fail_fn,bool is_top_app)739 static void SetSchedulerPolicy(fail_fn_t fail_fn, bool is_top_app) {
740   SchedPolicy policy = is_top_app ? SP_TOP_APP : SP_DEFAULT;
741 
742   if (is_top_app && cpusets_enabled()) {
743     errno = -set_cpuset_policy(0, policy);
744     if (errno != 0) {
745       fail_fn(CREATE_ERROR("set_cpuset_policy(0, %d) failed: %s", policy, strerror(errno)));
746     }
747   }
748 
749   errno = -set_sched_policy(0, policy);
750   if (errno != 0) {
751     fail_fn(CREATE_ERROR("set_sched_policy(0, %d) failed: %s", policy, strerror(errno)));
752   }
753 
754   // We are going to lose the permission to set scheduler policy during the specialization, so make
755   // sure that we don't cache the fd of cgroup path that may cause sepolicy violation by writing
756   // value to the cached fd directly when creating new thread.
757   DropTaskProfilesResourceCaching();
758 }
759 
UnmountTree(const char * path)760 static int UnmountTree(const char* path) {
761   ATRACE_CALL();
762 
763   size_t path_len = strlen(path);
764 
765   FILE* fp = setmntent("/proc/mounts", "r");
766   if (fp == nullptr) {
767     ALOGE("Error opening /proc/mounts: %s", strerror(errno));
768     return -errno;
769   }
770 
771   // Some volumes can be stacked on each other, so force unmount in
772   // reverse order to give us the best chance of success.
773   std::list<std::string> to_unmount;
774   mntent* mentry;
775   while ((mentry = getmntent(fp)) != nullptr) {
776     if (strncmp(mentry->mnt_dir, path, path_len) == 0) {
777       to_unmount.push_front(std::string(mentry->mnt_dir));
778     }
779   }
780   endmntent(fp);
781 
782   for (const auto& path : to_unmount) {
783     if (umount2(path.c_str(), MNT_DETACH)) {
784       ALOGW("Failed to unmount %s: %s", path.c_str(), strerror(errno));
785     }
786   }
787   return 0;
788 }
789 
PrepareDir(const std::string & dir,mode_t mode,uid_t uid,gid_t gid,fail_fn_t fail_fn)790 static void PrepareDir(const std::string& dir, mode_t mode, uid_t uid, gid_t gid,
791                       fail_fn_t fail_fn) {
792   if (fs_prepare_dir(dir.c_str(), mode, uid, gid) != 0) {
793     fail_fn(CREATE_ERROR("fs_prepare_dir failed on %s: %s",
794                          dir.c_str(), strerror(errno)));
795   }
796 }
797 
PrepareDirIfNotPresent(const std::string & dir,mode_t mode,uid_t uid,gid_t gid,fail_fn_t fail_fn)798 static void PrepareDirIfNotPresent(const std::string& dir, mode_t mode, uid_t uid, gid_t gid,
799                       fail_fn_t fail_fn) {
800   struct stat sb;
801   if (TEMP_FAILURE_RETRY(stat(dir.c_str(), &sb)) != -1) {
802     // Directory exists already
803     return;
804   }
805   PrepareDir(dir, mode, uid, gid, fail_fn);
806 }
807 
BindMount(const std::string & source_dir,const std::string & target_dir)808 static bool BindMount(const std::string& source_dir, const std::string& target_dir) {
809   return !(TEMP_FAILURE_RETRY(mount(source_dir.c_str(), target_dir.c_str(), nullptr,
810                                     MS_BIND | MS_REC, nullptr)) == -1);
811 }
812 
BindMount(const std::string & source_dir,const std::string & target_dir,fail_fn_t fail_fn)813 static void BindMount(const std::string& source_dir, const std::string& target_dir,
814                       fail_fn_t fail_fn) {
815   if (!BindMount(source_dir, target_dir)) {
816     fail_fn(CREATE_ERROR("Failed to mount %s to %s: %s",
817                          source_dir.c_str(), target_dir.c_str(), strerror(errno)));
818   }
819 }
820 
MountAppDataTmpFs(const std::string & target_dir,fail_fn_t fail_fn)821 static void MountAppDataTmpFs(const std::string& target_dir,
822                       fail_fn_t fail_fn) {
823   if (TEMP_FAILURE_RETRY(mount("tmpfs", target_dir.c_str(), "tmpfs",
824                                MS_NOSUID | MS_NODEV | MS_NOEXEC, "uid=0,gid=0,mode=0751")) == -1) {
825     fail_fn(CREATE_ERROR("Failed to mount tmpfs to %s: %s",
826                          target_dir.c_str(), strerror(errno)));
827   }
828 }
829 
830 // Create a private mount namespace and bind mount appropriate emulated
831 // storage for the given user.
MountEmulatedStorage(uid_t uid,jint mount_mode,bool force_mount_namespace,fail_fn_t fail_fn)832 static void MountEmulatedStorage(uid_t uid, jint mount_mode,
833         bool force_mount_namespace,
834         fail_fn_t fail_fn) {
835   // See storage config details at http://source.android.com/tech/storage/
836   ATRACE_CALL();
837 
838   if (mount_mode < 0 || mount_mode >= MOUNT_EXTERNAL_COUNT) {
839     fail_fn(CREATE_ERROR("Unknown mount_mode: %d", mount_mode));
840   }
841 
842   if (mount_mode == MOUNT_EXTERNAL_NONE && !force_mount_namespace) {
843     // Valid default of no storage visible
844     return;
845   }
846 
847   // Create a second private mount namespace for our process
848   ensureInAppMountNamespace(fail_fn);
849 
850   // Handle force_mount_namespace with MOUNT_EXTERNAL_NONE.
851   if (mount_mode == MOUNT_EXTERNAL_NONE) {
852     return;
853   }
854 
855   const userid_t user_id = multiuser_get_user_id(uid);
856   const std::string user_source = StringPrintf("/mnt/user/%d", user_id);
857   // Shell is neither AID_ROOT nor AID_EVERYBODY. Since it equally needs 'execute' access to
858   // /mnt/user/0 to 'adb shell ls /sdcard' for instance, we set the uid bit of /mnt/user/0 to
859   // AID_SHELL. This gives shell access along with apps running as group everybody (user 0 apps)
860   // These bits should be consistent with what is set in vold in
861   // Utils#MountUserFuse on FUSE volume mount
862   PrepareDir(user_source, 0710, user_id ? AID_ROOT : AID_SHELL,
863              multiuser_get_uid(user_id, AID_EVERYBODY), fail_fn);
864 
865   bool isAppDataIsolationEnabled = GetBoolProperty(kVoldAppDataIsolation, false);
866 
867   if (mount_mode == MOUNT_EXTERNAL_PASS_THROUGH) {
868       const std::string pass_through_source = StringPrintf("/mnt/pass_through/%d", user_id);
869       PrepareDir(pass_through_source, 0710, AID_ROOT, AID_MEDIA_RW, fail_fn);
870       BindMount(pass_through_source, "/storage", fail_fn);
871   } else if (mount_mode == MOUNT_EXTERNAL_INSTALLER) {
872       const std::string installer_source = StringPrintf("/mnt/installer/%d", user_id);
873       BindMount(installer_source, "/storage", fail_fn);
874   } else if (isAppDataIsolationEnabled && mount_mode == MOUNT_EXTERNAL_ANDROID_WRITABLE) {
875       const std::string writable_source = StringPrintf("/mnt/androidwritable/%d", user_id);
876       BindMount(writable_source, "/storage", fail_fn);
877   } else {
878       BindMount(user_source, "/storage", fail_fn);
879   }
880 }
881 
882 // Utility to close down the Zygote socket file descriptors while
883 // the child is still running as root with Zygote's privileges.  Each
884 // descriptor (if any) is closed via dup3(), replacing it with a valid
885 // (open) descriptor to /dev/null.
886 
DetachDescriptors(JNIEnv * env,const std::vector<int> & fds_to_close,fail_fn_t fail_fn)887 static void DetachDescriptors(JNIEnv* env,
888                               const std::vector<int>& fds_to_close,
889                               fail_fn_t fail_fn) {
890 
891   if (fds_to_close.size() > 0) {
892     android::base::unique_fd devnull_fd(open("/dev/null", O_RDWR | O_CLOEXEC));
893     if (devnull_fd == -1) {
894       fail_fn(std::string("Failed to open /dev/null: ").append(strerror(errno)));
895     }
896 
897     for (int fd : fds_to_close) {
898       ALOGV("Switching descriptor %d to /dev/null", fd);
899       if (TEMP_FAILURE_RETRY(dup3(devnull_fd, fd, O_CLOEXEC)) == -1) {
900         fail_fn(StringPrintf("Failed dup3() on descriptor %d: %s", fd, strerror(errno)));
901       }
902     }
903   }
904 }
905 
SetThreadName(const std::string & thread_name)906 void SetThreadName(const std::string& thread_name) {
907   bool hasAt = false;
908   bool hasDot = false;
909 
910   for (const char str_el : thread_name) {
911     if (str_el == '.') {
912       hasDot = true;
913     } else if (str_el == '@') {
914       hasAt = true;
915     }
916   }
917 
918   const char* name_start_ptr = thread_name.c_str();
919   if (thread_name.length() >= MAX_NAME_LENGTH && !hasAt && hasDot) {
920     name_start_ptr += thread_name.length() - MAX_NAME_LENGTH;
921   }
922 
923   // pthread_setname_np fails rather than truncating long strings.
924   char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
925   strlcpy(buf, name_start_ptr, sizeof(buf));
926   errno = pthread_setname_np(pthread_self(), buf);
927   if (errno != 0) {
928     ALOGW("Unable to set the name of current thread to '%s': %s", buf, strerror(errno));
929   }
930   // Update base::logging default tag.
931   android::base::SetDefaultTag(buf);
932 }
933 
934 /**
935  * A helper method for converting managed strings to native strings.  A fatal
936  * error is generated if a problem is encountered in extracting a non-null
937  * string.
938  *
939  * @param env  Managed runtime environment
940  * @param process_name  A native representation of the process name
941  * @param managed_process_name  A managed representation of the process name
942  * @param managed_string  The managed string to extract
943  *
944  * @return An empty option if the managed string is null.  A optional-wrapped
945  * string otherwise.
946  */
ExtractJString(JNIEnv * env,const char * process_name,jstring managed_process_name,jstring managed_string)947 static std::optional<std::string> ExtractJString(JNIEnv* env,
948                                                  const char* process_name,
949                                                  jstring managed_process_name,
950                                                  jstring managed_string) {
951   if (managed_string == nullptr) {
952     return std::nullopt;
953   } else {
954     ScopedUtfChars scoped_string_chars(env, managed_string);
955 
956     if (scoped_string_chars.c_str() != nullptr) {
957       return std::optional<std::string>(scoped_string_chars.c_str());
958     } else {
959       ZygoteFailure(env, process_name, managed_process_name, "Failed to extract JString.");
960     }
961   }
962 }
963 
964 /**
965  * A helper method for converting managed string arrays to native vectors.  A
966  * fatal error is generated if a problem is encountered in extracting a non-null array.
967  *
968  * @param env  Managed runtime environment
969  * @param process_name  A native representation of the process name
970  * @param managed_process_name  A managed representation of the process name
971  * @param managed_array  The managed integer array to extract
972  *
973  * @return An empty option if the managed array is null.  A optional-wrapped
974  * vector otherwise.
975  */
ExtractJIntArray(JNIEnv * env,const char * process_name,jstring managed_process_name,jintArray managed_array)976 static std::optional<std::vector<int>> ExtractJIntArray(JNIEnv* env,
977                                                         const char* process_name,
978                                                         jstring managed_process_name,
979                                                         jintArray managed_array) {
980   if (managed_array == nullptr) {
981     return std::nullopt;
982   } else {
983     ScopedIntArrayRO managed_array_handle(env, managed_array);
984 
985     if (managed_array_handle.get() != nullptr) {
986       std::vector<int> native_array;
987       native_array.reserve(managed_array_handle.size());
988 
989       for (size_t array_index = 0; array_index < managed_array_handle.size(); ++array_index) {
990         native_array.push_back(managed_array_handle[array_index]);
991       }
992 
993       return std::move(native_array);
994 
995     } else {
996       ZygoteFailure(env, process_name, managed_process_name, "Failed to extract JIntArray.");
997     }
998   }
999 }
1000 
1001 /**
1002  * A utility function for blocking signals.
1003  *
1004  * @param signum  Signal number to block
1005  * @param fail_fn  Fatal error reporting function
1006  *
1007  * @see ZygoteFailure
1008  */
BlockSignal(int signum,fail_fn_t fail_fn)1009 static void BlockSignal(int signum, fail_fn_t fail_fn) {
1010   sigset_t sigs;
1011   sigemptyset(&sigs);
1012   sigaddset(&sigs, signum);
1013 
1014   if (sigprocmask(SIG_BLOCK, &sigs, nullptr) == -1) {
1015     fail_fn(CREATE_ERROR("Failed to block signal %s: %s", strsignal(signum), strerror(errno)));
1016   }
1017 }
1018 
1019 
1020 /**
1021  * A utility function for unblocking signals.
1022  *
1023  * @param signum  Signal number to unblock
1024  * @param fail_fn  Fatal error reporting function
1025  *
1026  * @see ZygoteFailure
1027  */
UnblockSignal(int signum,fail_fn_t fail_fn)1028 static void UnblockSignal(int signum, fail_fn_t fail_fn) {
1029   sigset_t sigs;
1030   sigemptyset(&sigs);
1031   sigaddset(&sigs, signum);
1032 
1033   if (sigprocmask(SIG_UNBLOCK, &sigs, nullptr) == -1) {
1034     fail_fn(CREATE_ERROR("Failed to un-block signal %s: %s", strsignal(signum), strerror(errno)));
1035   }
1036 }
1037 
ClearUsapTable()1038 static void ClearUsapTable() {
1039   for (UsapTableEntry& entry : gUsapTable) {
1040     entry.Clear();
1041   }
1042 
1043   gUsapPoolCount = 0;
1044 }
1045 
1046 // Create an app data directory over tmpfs overlayed CE / DE storage, and bind mount it
1047 // from the actual app data directory in data mirror.
createAndMountAppData(std::string_view package_name,std::string_view mirror_pkg_dir_name,std::string_view mirror_data_path,std::string_view actual_data_path,fail_fn_t fail_fn,bool call_fail_fn)1048 static bool createAndMountAppData(std::string_view package_name,
1049     std::string_view mirror_pkg_dir_name, std::string_view mirror_data_path,
1050     std::string_view actual_data_path, fail_fn_t fail_fn, bool call_fail_fn) {
1051 
1052   char mirrorAppDataPath[PATH_MAX];
1053   char actualAppDataPath[PATH_MAX];
1054   snprintf(mirrorAppDataPath, PATH_MAX, "%s/%s", mirror_data_path.data(),
1055       mirror_pkg_dir_name.data());
1056   snprintf(actualAppDataPath, PATH_MAX, "%s/%s", actual_data_path.data(), package_name.data());
1057 
1058   PrepareDir(actualAppDataPath, 0700, AID_ROOT, AID_ROOT, fail_fn);
1059 
1060   // Bind mount from original app data directory in mirror.
1061   if (call_fail_fn) {
1062     BindMount(mirrorAppDataPath, actualAppDataPath, fail_fn);
1063   } else if(!BindMount(mirrorAppDataPath, actualAppDataPath)) {
1064     ALOGW("Failed to mount %s to %s: %s",
1065           mirrorAppDataPath, actualAppDataPath, strerror(errno));
1066     return false;
1067   }
1068   return true;
1069 }
1070 
1071 // There is an app data directory over tmpfs overlaid CE / DE storage
1072 // bind mount it from the actual app data directory in data mirror.
mountAppData(std::string_view package_name,std::string_view mirror_pkg_dir_name,std::string_view mirror_data_path,std::string_view actual_data_path,fail_fn_t fail_fn)1073 static void mountAppData(std::string_view package_name,
1074     std::string_view mirror_pkg_dir_name, std::string_view mirror_data_path,
1075     std::string_view actual_data_path, fail_fn_t fail_fn) {
1076 
1077   char mirrorAppDataPath[PATH_MAX];
1078   char actualAppDataPath[PATH_MAX];
1079   snprintf(mirrorAppDataPath, PATH_MAX, "%s/%s", mirror_data_path.data(),
1080       mirror_pkg_dir_name.data());
1081   snprintf(actualAppDataPath, PATH_MAX, "%s/%s", actual_data_path.data(), package_name.data());
1082 
1083   // Bind mount from original app data directory in mirror.
1084   BindMount(mirrorAppDataPath, actualAppDataPath, fail_fn);
1085 }
1086 
1087 // Get the directory name stored in /data/data. If device is unlocked it should be the same as
1088 // package name, otherwise it will be an encrypted name but with same inode number.
getAppDataDirName(std::string_view parent_path,std::string_view package_name,long long ce_data_inode,fail_fn_t fail_fn)1089 static std::string getAppDataDirName(std::string_view parent_path, std::string_view package_name,
1090       long long ce_data_inode, fail_fn_t fail_fn) {
1091   // Check if directory exists
1092   char tmpPath[PATH_MAX];
1093   snprintf(tmpPath, PATH_MAX, "%s/%s", parent_path.data(), package_name.data());
1094   struct stat s;
1095   int err = stat(tmpPath, &s);
1096   if (err == 0) {
1097     // Directory exists, so return the directory name
1098     return package_name.data();
1099   } else {
1100     if (errno != ENOENT) {
1101       fail_fn(CREATE_ERROR("Unexpected error in getAppDataDirName: %s", strerror(errno)));
1102       return nullptr;
1103     }
1104     {
1105       // Directory doesn't exist, try to search the name from inode
1106       std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(parent_path.data()), closedir);
1107       if (dir == nullptr) {
1108         fail_fn(CREATE_ERROR("Failed to opendir %s", parent_path.data()));
1109       }
1110       struct dirent* ent;
1111       while ((ent = readdir(dir.get()))) {
1112         if (static_cast<long long>(ent->d_ino) == ce_data_inode) {
1113             return ent->d_name;
1114         }
1115       }
1116     }
1117 
1118     // Fallback due to b/145989852, ce_data_inode stored in package manager may be corrupted
1119     // if ino_t is 32 bits.
1120     ino_t fixed_ce_data_inode = 0;
1121     if ((ce_data_inode & UPPER_HALF_WORD_MASK) == UPPER_HALF_WORD_MASK) {
1122       fixed_ce_data_inode = ce_data_inode & LOWER_HALF_WORD_MASK;
1123     } else if ((ce_data_inode & LOWER_HALF_WORD_MASK) == LOWER_HALF_WORD_MASK) {
1124       fixed_ce_data_inode = ((ce_data_inode >> 32) & LOWER_HALF_WORD_MASK);
1125     }
1126     if (fixed_ce_data_inode != 0) {
1127       std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(parent_path.data()), closedir);
1128       if (dir == nullptr) {
1129         fail_fn(CREATE_ERROR("Failed to opendir %s", parent_path.data()));
1130       }
1131       struct dirent* ent;
1132       while ((ent = readdir(dir.get()))) {
1133         if (ent->d_ino == fixed_ce_data_inode) {
1134           long long d_ino = ent->d_ino;
1135           ALOGW("Fallback success inode %lld -> %lld", ce_data_inode, d_ino);
1136           return ent->d_name;
1137         }
1138       }
1139     }
1140     // Fallback done
1141     ALOGW("Unable to find %s:%lld in %s", package_name.data(), ce_data_inode, parent_path.data());
1142     return "";
1143   }
1144 }
1145 
1146 // Isolate app's data directory, by mounting a tmpfs on CE DE storage,
1147 // and create and bind mount app data in related_packages.
isolateAppDataPerPackage(int userId,std::string_view package_name,std::string_view volume_uuid,long long ce_data_inode,std::string_view actualCePath,std::string_view actualDePath,fail_fn_t fail_fn)1148 static void isolateAppDataPerPackage(int userId, std::string_view package_name,
1149     std::string_view volume_uuid, long long ce_data_inode, std::string_view actualCePath,
1150     std::string_view actualDePath, fail_fn_t fail_fn) {
1151 
1152   char mirrorCePath[PATH_MAX];
1153   char mirrorDePath[PATH_MAX];
1154   char mirrorCeParent[PATH_MAX];
1155   snprintf(mirrorCeParent, PATH_MAX, "/data_mirror/data_ce/%s", volume_uuid.data());
1156   snprintf(mirrorCePath, PATH_MAX, "%s/%d", mirrorCeParent, userId);
1157   snprintf(mirrorDePath, PATH_MAX, "/data_mirror/data_de/%s/%d", volume_uuid.data(), userId);
1158 
1159   createAndMountAppData(package_name, package_name, mirrorDePath, actualDePath, fail_fn,
1160                         true /*call_fail_fn*/);
1161 
1162   std::string ce_data_path = getAppDataDirName(mirrorCePath, package_name, ce_data_inode, fail_fn);
1163   if (ce_data_path.empty()) {
1164     ALOGE("Ignoring missing CE app data dir for %s\n", package_name.data());
1165     return;
1166   }
1167   if (!createAndMountAppData(package_name, ce_data_path, mirrorCePath, actualCePath, fail_fn,
1168                              false /*call_fail_fn*/)) {
1169     // CE might unlocks and the name is decrypted
1170     // get the name and mount again
1171     ce_data_path=getAppDataDirName(mirrorCePath, package_name, ce_data_inode, fail_fn);
1172     if (ce_data_path.empty()) {
1173       ALOGE("Ignoring missing CE app data dir for %s\n", package_name.data());
1174       return;
1175     }
1176     mountAppData(package_name, ce_data_path, mirrorCePath, actualCePath, fail_fn);
1177   }
1178 }
1179 
1180 // Relabel directory
relabelDir(const char * path,const char * context,fail_fn_t fail_fn)1181 static void relabelDir(const char* path, const char* context, fail_fn_t fail_fn) {
1182   if (setfilecon(path, context) != 0) {
1183     fail_fn(CREATE_ERROR("Failed to setfilecon %s %s", path, strerror(errno)));
1184   }
1185 }
1186 
1187 // Relabel the subdirectories and symlinks in the given directory, non-recursively.
relabelSubdirs(const char * path,const char * context,fail_fn_t fail_fn)1188 static void relabelSubdirs(const char* path, const char* context, fail_fn_t fail_fn) {
1189   DIR* dir = opendir(path);
1190   if (dir == nullptr) {
1191     fail_fn(CREATE_ERROR("Failed to opendir %s", path));
1192   }
1193   struct dirent* ent;
1194   while ((ent = readdir(dir))) {
1195     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1196     auto filePath = StringPrintf("%s/%s", path, ent->d_name);
1197     if (ent->d_type == DT_DIR) {
1198       relabelDir(filePath.c_str(), context, fail_fn);
1199     } else if (ent->d_type == DT_LNK) {
1200       if (lsetfilecon(filePath.c_str(), context) != 0) {
1201         fail_fn(CREATE_ERROR("Failed to lsetfilecon %s %s", filePath.c_str(), strerror(errno)));
1202       }
1203     } else {
1204       fail_fn(CREATE_ERROR("Unexpected type: %d %s", ent->d_type, filePath.c_str()));
1205     }
1206   }
1207   closedir(dir);
1208 }
1209 
1210 /**
1211  * Hide the CE and DE data directories of non-related apps.
1212  *
1213  * Without this, apps can detect if any app is installed by trying to "touch" the app's CE
1214  * or DE data directory, e.g. /data/data/com.whatsapp.  This fails with EACCES if the app
1215  * is installed, or ENOENT if it's not.  Traditional file permissions or SELinux can only
1216  * block accessing those directories but can't fix fingerprinting like this.
1217  *
1218  * Instead, we hide non-related apps' data directories from the filesystem entirely by
1219  * mounting tmpfs instances over their parent directories and bind-mounting in just the
1220  * needed app data directories.  This is done in a private mount namespace.
1221  *
1222  * Steps:
1223  * (1) Collect a list of all related apps (apps with same uid and allowlisted apps) data info
1224  *     (package name, data stored volume uuid, and inode number of its CE data directory)
1225  * (2) Mount tmpfs on /data/data and /data/user{,_de}, and on /mnt/expand/$volume/user{,_de}
1226  *     for all adoptable storage volumes.  This hides all app data directories.
1227  * (3) For each related app, create stubs for its data directories in the relevant tmpfs
1228  *     instances, then bind mount in the actual directories from /data_mirror.  This works
1229  *     for both the CE and DE directories.  DE storage is always unlocked, whereas the
1230  *     app's CE directory can be found via inode number if CE storage is locked.
1231  *
1232  * Example assuming user 0, app "com.android.foo", no shared uid, and no adoptable storage:
1233  * (1) Info = ["com.android.foo", "null" (volume uuid "null"=default), "123456" (inode number)]
1234  * (2) Mount tmpfs on /data/data, /data/user, and /data/user_de.
1235  * (3) For DE storage, create a directory /data/user_de/0/com.android.foo and bind mount
1236  *     /data_mirror/data_de/0/com.android.foo onto it.
1237  * (4) Do similar for CE storage.  But if the device is in direct boot mode, then CE
1238  *     storage will be locked, so the app's CE data directory won't exist at the usual
1239  *     path /data_mirror/data_ce/0/com.android.foo.  It will still exist in
1240  *     /data_mirror/data_ce/0, but its filename will be an unpredictable no-key name.  In
1241  *     this case, we use the inode number to find the right directory instead.  Note that
1242  *     the bind-mounted app CE data directory will remain locked.  It will be unlocked
1243  *     automatically if/when the user's CE storage is unlocked, since adding an encryption
1244  *     key takes effect on a whole filesystem instance including all its mounts.
1245  */
isolateAppData(JNIEnv * env,const std::vector<std::string> & merged_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1246 static void isolateAppData(JNIEnv* env, const std::vector<std::string>& merged_data_info_list,
1247     uid_t uid, const char* process_name,
1248     jstring managed_nice_name, fail_fn_t fail_fn) {
1249 
1250   const userid_t userId = multiuser_get_user_id(uid);
1251 
1252   int size = merged_data_info_list.size();
1253 
1254   // Mount tmpfs on all possible data directories, so app no longer see the original apps data.
1255   char internalCePath[PATH_MAX];
1256   char internalLegacyCePath[PATH_MAX];
1257   char internalDePath[PATH_MAX];
1258   char externalPrivateMountPath[PATH_MAX];
1259 
1260   snprintf(internalCePath, PATH_MAX, "/data/user");
1261   snprintf(internalLegacyCePath, PATH_MAX, "/data/data");
1262   snprintf(internalDePath, PATH_MAX, "/data/user_de");
1263   snprintf(externalPrivateMountPath, PATH_MAX, "/mnt/expand");
1264 
1265   // Get the "u:object_r:system_userdir_file:s0" security context.  This can be
1266   // gotten from several different places; we use /data/user.
1267   char* dataUserdirContext = nullptr;
1268   if (getfilecon(internalCePath, &dataUserdirContext) < 0) {
1269     fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", internalCePath,
1270         strerror(errno)));
1271   }
1272   // Get the "u:object_r:system_data_file:s0" security context.  This can be
1273   // gotten from several different places; we use /data/misc.
1274   char* dataFileContext = nullptr;
1275   if (getfilecon("/data/misc", &dataFileContext) < 0) {
1276     fail_fn(CREATE_ERROR("Unable to getfilecon on /data/misc %s", strerror(errno)));
1277   }
1278 
1279   MountAppDataTmpFs(internalLegacyCePath, fail_fn);
1280   MountAppDataTmpFs(internalCePath, fail_fn);
1281   MountAppDataTmpFs(internalDePath, fail_fn);
1282 
1283   // Mount tmpfs on all external vols DE and CE storage
1284   DIR* dir = opendir(externalPrivateMountPath);
1285   if (dir == nullptr) {
1286     fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1287   }
1288   struct dirent* ent;
1289   while ((ent = readdir(dir))) {
1290     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1291     if (ent->d_type != DT_DIR) {
1292       fail_fn(CREATE_ERROR("Unexpected type: %d %s", ent->d_type, ent->d_name));
1293     }
1294     auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1295     auto cePath = StringPrintf("%s/user", volPath.c_str());
1296     auto dePath = StringPrintf("%s/user_de", volPath.c_str());
1297     // Wait until dir user is created.
1298     WaitUntilDirReady(cePath.c_str(), fail_fn);
1299     MountAppDataTmpFs(cePath.c_str(), fail_fn);
1300     // Wait until dir user_de is created.
1301     WaitUntilDirReady(dePath.c_str(), fail_fn);
1302     MountAppDataTmpFs(dePath.c_str(), fail_fn);
1303   }
1304   closedir(dir);
1305 
1306   // No bind mounting of app data should occur in the case of a sandbox process since SDK sandboxes
1307   // should not be able to read app data. Tmpfs was mounted however since a sandbox should not have
1308   // access to app data.
1309   appid_t appId = multiuser_get_app_id(uid);
1310   bool isSdkSandboxProcess =
1311           (appId >= AID_SDK_SANDBOX_PROCESS_START && appId <= AID_SDK_SANDBOX_PROCESS_END);
1312   if (!isSdkSandboxProcess) {
1313       // Prepare default dirs for user 0 as user 0 always exists.
1314       int result = symlink("/data/data", "/data/user/0");
1315       if (result != 0) {
1316           fail_fn(CREATE_ERROR("Failed to create symlink /data/user/0 %s", strerror(errno)));
1317       }
1318       PrepareDirIfNotPresent("/data/user_de/0", DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1319                              fail_fn);
1320 
1321       for (int i = 0; i < size; i += 3) {
1322           std::string const& packageName = merged_data_info_list[i];
1323           std::string const& volUuid = merged_data_info_list[i + 1];
1324           std::string const& inode = merged_data_info_list[i + 2];
1325 
1326           std::string::size_type sz;
1327           long long ceDataInode = std::stoll(inode, &sz);
1328 
1329           std::string actualCePath, actualDePath;
1330           if (volUuid.compare("null") != 0) {
1331               // Volume that is stored in /mnt/expand
1332               char volPath[PATH_MAX];
1333               char volCePath[PATH_MAX];
1334               char volDePath[PATH_MAX];
1335               char volCeUserPath[PATH_MAX];
1336               char volDeUserPath[PATH_MAX];
1337 
1338               snprintf(volPath, PATH_MAX, "/mnt/expand/%s", volUuid.c_str());
1339               snprintf(volCePath, PATH_MAX, "%s/user", volPath);
1340               snprintf(volDePath, PATH_MAX, "%s/user_de", volPath);
1341               snprintf(volCeUserPath, PATH_MAX, "%s/%d", volCePath, userId);
1342               snprintf(volDeUserPath, PATH_MAX, "%s/%d", volDePath, userId);
1343 
1344               PrepareDirIfNotPresent(volPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1345                                      fail_fn);
1346               PrepareDirIfNotPresent(volCePath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1347                                      fail_fn);
1348               PrepareDirIfNotPresent(volDePath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1349                                      fail_fn);
1350               PrepareDirIfNotPresent(volCeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1351                                      fail_fn);
1352               PrepareDirIfNotPresent(volDeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1353                                      fail_fn);
1354 
1355               actualCePath = volCeUserPath;
1356               actualDePath = volDeUserPath;
1357           } else {
1358               // Internal volume that stored in /data
1359               char internalCeUserPath[PATH_MAX];
1360               char internalDeUserPath[PATH_MAX];
1361               snprintf(internalCeUserPath, PATH_MAX, "/data/user/%d", userId);
1362               snprintf(internalDeUserPath, PATH_MAX, "/data/user_de/%d", userId);
1363               // If it's not user 0, create /data/user/$USER.
1364               if (userId == 0) {
1365                   actualCePath = internalLegacyCePath;
1366               } else {
1367                   PrepareDirIfNotPresent(internalCeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT,
1368                                          AID_ROOT, fail_fn);
1369                   actualCePath = internalCeUserPath;
1370               }
1371               PrepareDirIfNotPresent(internalDeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT,
1372                                      AID_ROOT, fail_fn);
1373               actualDePath = internalDeUserPath;
1374           }
1375           isolateAppDataPerPackage(userId, packageName, volUuid, ceDataInode, actualCePath,
1376                                    actualDePath, fail_fn);
1377       }
1378   }
1379 
1380   // We set the label AFTER everything is done, as we are applying
1381   // the file operations on tmpfs. If we set the label when we mount
1382   // tmpfs, SELinux will not happy as we are changing system_data_files.
1383   // Relabel dir under /data/user, including /data/user/0
1384   relabelSubdirs(internalCePath, dataFileContext, fail_fn);
1385 
1386   // Relabel /data/user
1387   relabelDir(internalCePath, dataUserdirContext, fail_fn);
1388 
1389   // Relabel /data/data
1390   relabelDir(internalLegacyCePath, dataFileContext, fail_fn);
1391 
1392   // Relabel subdirectories of /data/user_de
1393   relabelSubdirs(internalDePath, dataFileContext, fail_fn);
1394 
1395   // Relabel /data/user_de
1396   relabelDir(internalDePath, dataUserdirContext, fail_fn);
1397 
1398   // Relabel CE and DE dirs under /mnt/expand
1399   dir = opendir(externalPrivateMountPath);
1400   if (dir == nullptr) {
1401     fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1402   }
1403   while ((ent = readdir(dir))) {
1404     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1405     auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1406     auto cePath = StringPrintf("%s/user", volPath.c_str());
1407     auto dePath = StringPrintf("%s/user_de", volPath.c_str());
1408 
1409     relabelSubdirs(cePath.c_str(), dataFileContext, fail_fn);
1410     relabelDir(cePath.c_str(), dataUserdirContext, fail_fn);
1411     relabelSubdirs(dePath.c_str(), dataFileContext, fail_fn);
1412     relabelDir(dePath.c_str(), dataUserdirContext, fail_fn);
1413   }
1414   closedir(dir);
1415 
1416   freecon(dataUserdirContext);
1417   freecon(dataFileContext);
1418 }
1419 
1420 /**
1421  * Without sdk sandbox data isolation, the sandbox could detect if another app is installed on the
1422  * system by "touching" other data directories like /data/misc_ce/0/sdksandbox/com.whatsapp, similar
1423  * to apps without app data isolation (see {@link #isolateAppData()}).
1424  *
1425  * To prevent this, tmpfs is mounted onto misc_ce and misc_de directories on all possible volumes in
1426  * a separate mount namespace. The sandbox directory path is then created containing the name of the
1427  * client app package associated with the sdk sandbox. The contents for this (sdk level storage and
1428  * shared sdk storage) are bind mounted from the sandbox data mirror.
1429  */
isolateSdkSandboxData(JNIEnv * env,jobjectArray pkg_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1430 static void isolateSdkSandboxData(JNIEnv* env, jobjectArray pkg_data_info_list, uid_t uid,
1431                                   const char* process_name, jstring managed_nice_name,
1432                                   fail_fn_t fail_fn) {
1433     const userid_t userId = multiuser_get_user_id(uid);
1434 
1435     int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
1436     // The sandbox should only have information of one associated client app (package, uuid, inode)
1437     if (size != 3) {
1438         fail_fn(CREATE_ERROR(
1439                 "Unable to isolate sandbox data, incorrect associated app information"));
1440     }
1441 
1442     auto extract_fn = [env, process_name, managed_nice_name,
1443                        pkg_data_info_list](int info_list_idx) {
1444         jstring jstr = (jstring)(env->GetObjectArrayElement(pkg_data_info_list, info_list_idx));
1445         return ExtractJString(env, process_name, managed_nice_name, jstr).value();
1446     };
1447     std::string packageName = extract_fn(0);
1448     std::string volUuid = extract_fn(1);
1449 
1450     char internalCePath[PATH_MAX];
1451     char internalDePath[PATH_MAX];
1452     char externalPrivateMountPath[PATH_MAX];
1453     snprintf(internalCePath, PATH_MAX, "/data/misc_ce");
1454     snprintf(internalDePath, PATH_MAX, "/data/misc_de");
1455     snprintf(externalPrivateMountPath, PATH_MAX, "/mnt/expand");
1456 
1457     char ceUserPath[PATH_MAX];
1458     char deUserPath[PATH_MAX];
1459     if (volUuid != "null") {
1460         snprintf(ceUserPath, PATH_MAX, "%s/%s/misc_ce/%d", externalPrivateMountPath,
1461                  volUuid.c_str(), userId);
1462         snprintf(deUserPath, PATH_MAX, "%s/%s/misc_de/%d", externalPrivateMountPath,
1463                  volUuid.c_str(), userId);
1464     } else {
1465         snprintf(ceUserPath, PATH_MAX, "%s/%d", internalCePath, userId);
1466         snprintf(deUserPath, PATH_MAX, "%s/%d", internalDePath, userId);
1467     }
1468 
1469     char ceSandboxPath[PATH_MAX];
1470     char deSandboxPath[PATH_MAX];
1471     snprintf(ceSandboxPath, PATH_MAX, "%s/sdksandbox", ceUserPath);
1472     snprintf(deSandboxPath, PATH_MAX, "%s/sdksandbox", deUserPath);
1473 
1474     // If the client app using the sandbox has been installed when the device is locked and the
1475     // sandbox starts up when the device is locked, sandbox storage might not have been created.
1476     // In that case, mount tmpfs for data isolation, but don't bind mount.
1477     bool bindMountCeSandboxDataDirs = true;
1478     bool bindMountDeSandboxDataDirs = true;
1479     if (access(ceSandboxPath, F_OK) != 0) {
1480         bindMountCeSandboxDataDirs = false;
1481     }
1482     if (access(deSandboxPath, F_OK) != 0) {
1483         bindMountDeSandboxDataDirs = false;
1484     }
1485 
1486     char* context = nullptr;
1487     char* userContext = nullptr;
1488     char* sandboxContext = nullptr;
1489     if (getfilecon(internalDePath, &context) < 0) {
1490         fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", internalDePath, strerror(errno)));
1491     }
1492     if (bindMountDeSandboxDataDirs) {
1493         if (getfilecon(deUserPath, &userContext) < 0) {
1494             fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", deUserPath, strerror(errno)));
1495         }
1496         if (getfilecon(deSandboxPath, &sandboxContext) < 0) {
1497             fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", deSandboxPath, strerror(errno)));
1498         }
1499     }
1500 
1501     MountAppDataTmpFs(internalCePath, fail_fn);
1502     MountAppDataTmpFs(internalDePath, fail_fn);
1503 
1504     // Mount tmpfs on all external volumes
1505     DIR* dir = opendir(externalPrivateMountPath);
1506     if (dir == nullptr) {
1507         fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1508     }
1509     struct dirent* ent;
1510     while ((ent = readdir(dir))) {
1511         if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1512         if (ent->d_type != DT_DIR) {
1513             fail_fn(CREATE_ERROR("Unexpected type: %d %s", ent->d_type, ent->d_name));
1514         }
1515         auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1516         auto externalCePath = StringPrintf("%s/misc_ce", volPath.c_str());
1517         auto externalDePath = StringPrintf("%s/misc_de", volPath.c_str());
1518 
1519         WaitUntilDirReady(externalCePath.c_str(), fail_fn);
1520         MountAppDataTmpFs(externalCePath.c_str(), fail_fn);
1521         WaitUntilDirReady(externalDePath.c_str(), fail_fn);
1522         MountAppDataTmpFs(externalDePath.c_str(), fail_fn);
1523     }
1524     closedir(dir);
1525 
1526     char mirrorCeSandboxPath[PATH_MAX];
1527     char mirrorDeSandboxPath[PATH_MAX];
1528     snprintf(mirrorCeSandboxPath, PATH_MAX, "/data_mirror/misc_ce/%s/%d/sdksandbox",
1529              volUuid.c_str(), userId);
1530     snprintf(mirrorDeSandboxPath, PATH_MAX, "/data_mirror/misc_de/%s/%d/sdksandbox",
1531              volUuid.c_str(), userId);
1532 
1533     if (bindMountCeSandboxDataDirs) {
1534         PrepareDir(ceUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1535         PrepareDir(ceSandboxPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1536         // TODO(b/231322885): Use inode numbers to find the correct app path when the device locked.
1537         createAndMountAppData(packageName, packageName, mirrorCeSandboxPath, ceSandboxPath, fail_fn,
1538                               true /*call_fail_fn*/);
1539 
1540         relabelDir(ceSandboxPath, sandboxContext, fail_fn);
1541         relabelDir(ceUserPath, userContext, fail_fn);
1542     }
1543     if (bindMountDeSandboxDataDirs) {
1544         PrepareDir(deUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1545         PrepareDir(deSandboxPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1546         createAndMountAppData(packageName, packageName, mirrorDeSandboxPath, deSandboxPath, fail_fn,
1547                               true /*call_fail_fn*/);
1548 
1549         relabelDir(deSandboxPath, sandboxContext, fail_fn);
1550         relabelDir(deUserPath, userContext, fail_fn);
1551     }
1552 
1553     // We set the label AFTER everything is done, as we are applying
1554     // the file operations on tmpfs. If we set the label when we mount
1555     // tmpfs, SELinux will not happy as we are changing system_data_files.
1556     relabelDir(internalCePath, context, fail_fn);
1557     relabelDir(internalDePath, context, fail_fn);
1558 
1559     // Relabel CE and DE dirs under /mnt/expand
1560     dir = opendir(externalPrivateMountPath);
1561     if (dir == nullptr) {
1562         fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1563     }
1564     while ((ent = readdir(dir))) {
1565         if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1566         auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1567         auto externalCePath = StringPrintf("%s/misc_ce", volPath.c_str());
1568         auto externalDePath = StringPrintf("%s/misc_de", volPath.c_str());
1569         relabelDir(externalCePath.c_str(), context, fail_fn);
1570         relabelDir(externalDePath.c_str(), context, fail_fn);
1571     }
1572     closedir(dir);
1573 
1574     if (bindMountDeSandboxDataDirs) {
1575         freecon(sandboxContext);
1576         freecon(userContext);
1577     }
1578     freecon(context);
1579 }
1580 
insertPackagesToMergedList(JNIEnv * env,std::vector<std::string> & merged_data_info_list,jobjectArray data_info_list,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1581 static void insertPackagesToMergedList(JNIEnv* env,
1582   std::vector<std::string>& merged_data_info_list,
1583   jobjectArray data_info_list, const char* process_name,
1584   jstring managed_nice_name, fail_fn_t fail_fn) {
1585 
1586   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1587 
1588   int size = (data_info_list != nullptr) ? env->GetArrayLength(data_info_list) : 0;
1589   // Size should be a multiple of 3, as it contains list of <package_name, volume_uuid, inode>
1590   if ((size % 3) != 0) {
1591     fail_fn(CREATE_ERROR("Wrong data_info_list size %d", size));
1592   }
1593 
1594   for (int i = 0; i < size; i += 3) {
1595     jstring package_str = (jstring) (env->GetObjectArrayElement(data_info_list, i));
1596     std::string packageName = extract_fn(package_str).value();
1597     merged_data_info_list.push_back(packageName);
1598 
1599     jstring vol_str = (jstring) (env->GetObjectArrayElement(data_info_list, i + 1));
1600     std::string volUuid = extract_fn(vol_str).value();
1601     merged_data_info_list.push_back(volUuid);
1602 
1603     jstring inode_str = (jstring) (env->GetObjectArrayElement(data_info_list, i + 2));
1604     std::string inode = extract_fn(inode_str).value();
1605     merged_data_info_list.push_back(inode);
1606   }
1607 }
1608 
isolateAppData(JNIEnv * env,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1609 static void isolateAppData(JNIEnv* env, jobjectArray pkg_data_info_list,
1610                            jobjectArray allowlisted_data_info_list, uid_t uid,
1611                            const char* process_name, jstring managed_nice_name, fail_fn_t fail_fn) {
1612     std::vector<std::string> merged_data_info_list;
1613     insertPackagesToMergedList(env, merged_data_info_list, pkg_data_info_list, process_name,
1614                                managed_nice_name, fail_fn);
1615     insertPackagesToMergedList(env, merged_data_info_list, allowlisted_data_info_list, process_name,
1616                                managed_nice_name, fail_fn);
1617 
1618     isolateAppData(env, merged_data_info_list, uid, process_name, managed_nice_name, fail_fn);
1619 }
1620 
1621 /**
1622  * Like isolateAppData(), isolate jit profile directories, so apps don't see what
1623  * other apps are installed by reading content inside /data/misc/profiles/cur.
1624  *
1625  * The implementation is similar to isolateAppData(), it creates a tmpfs
1626  * on /data/misc/profiles/cur, and bind mounts related package profiles to it.
1627  */
isolateJitProfile(JNIEnv * env,jobjectArray pkg_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1628 static void isolateJitProfile(JNIEnv* env, jobjectArray pkg_data_info_list,
1629     uid_t uid, const char* process_name, jstring managed_nice_name,
1630     fail_fn_t fail_fn) {
1631 
1632   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1633   const userid_t user_id = multiuser_get_user_id(uid);
1634 
1635   int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
1636   // Size should be a multiple of 3, as it contains list of <package_name, volume_uuid, inode>
1637   if ((size % 3) != 0) {
1638     fail_fn(CREATE_ERROR("Wrong pkg_inode_list size %d", size));
1639   }
1640 
1641   // Mount (namespace) tmpfs on profile directory, so apps no longer access
1642   // the original profile directory anymore.
1643   MountAppDataTmpFs(kCurProfileDirPath, fail_fn);
1644   MountAppDataTmpFs(kRefProfileDirPath, fail_fn);
1645 
1646   // Sandbox processes do not have JIT profile, so no data needs to be bind mounted. However, it
1647   // should still not have access to JIT profile, so tmpfs is mounted.
1648   appid_t appId = multiuser_get_app_id(uid);
1649   if (appId >= AID_SDK_SANDBOX_PROCESS_START && appId <= AID_SDK_SANDBOX_PROCESS_END) {
1650       return;
1651   }
1652 
1653   // Create profile directory for this user.
1654   std::string actualCurUserProfile = StringPrintf("%s/%d", kCurProfileDirPath, user_id);
1655   PrepareDir(actualCurUserProfile, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1656 
1657   for (int i = 0; i < size; i += 3) {
1658     jstring package_str = (jstring) (env->GetObjectArrayElement(pkg_data_info_list, i));
1659     std::string packageName = extract_fn(package_str).value();
1660 
1661     std::string actualCurPackageProfile = StringPrintf("%s/%s", actualCurUserProfile.c_str(),
1662         packageName.c_str());
1663     std::string mirrorCurPackageProfile = StringPrintf("/data_mirror/cur_profiles/%d/%s",
1664         user_id, packageName.c_str());
1665     std::string actualRefPackageProfile = StringPrintf("%s/%s", kRefProfileDirPath,
1666         packageName.c_str());
1667     std::string mirrorRefPackageProfile = StringPrintf("/data_mirror/ref_profiles/%s",
1668         packageName.c_str());
1669 
1670     if (access(mirrorCurPackageProfile.c_str(), F_OK) != 0) {
1671       ALOGW("Can't access app profile directory: %s", mirrorCurPackageProfile.c_str());
1672       continue;
1673     }
1674     if (access(mirrorRefPackageProfile.c_str(), F_OK) != 0) {
1675       ALOGW("Can't access app profile directory: %s", mirrorRefPackageProfile.c_str());
1676       continue;
1677     }
1678 
1679     PrepareDir(actualCurPackageProfile, DEFAULT_DATA_DIR_PERMISSION, uid, uid, fail_fn);
1680     BindMount(mirrorCurPackageProfile, actualCurPackageProfile, fail_fn);
1681     PrepareDir(actualRefPackageProfile, DEFAULT_DATA_DIR_PERMISSION, uid, uid, fail_fn);
1682     BindMount(mirrorRefPackageProfile, actualRefPackageProfile, fail_fn);
1683   }
1684 }
1685 
WaitUntilDirReady(const std::string & target,fail_fn_t fail_fn)1686 static void WaitUntilDirReady(const std::string& target, fail_fn_t fail_fn) {
1687   unsigned int sleepIntervalUs = STORAGE_DIR_CHECK_INIT_INTERVAL_US;
1688 
1689   // This is just an approximate value as it doesn't need to be very accurate.
1690   unsigned int sleepTotalUs = 0;
1691 
1692   const char* dir_path = target.c_str();
1693   while (sleepTotalUs < STORAGE_DIR_CHECK_TIMEOUT_US) {
1694     if (access(dir_path, F_OK) == 0) {
1695       return;
1696     }
1697     // Failed, so we add exponential backoff and retry
1698     usleep(sleepIntervalUs);
1699     sleepTotalUs += sleepIntervalUs;
1700     sleepIntervalUs = std::min<unsigned int>(
1701         sleepIntervalUs * STORAGE_DIR_CHECK_RETRY_MULTIPLIER,
1702         STORAGE_DIR_CHECK_MAX_INTERVAL_US);
1703   }
1704   // Last chance and get the latest errno if it fails.
1705   if (access(dir_path, F_OK) == 0) {
1706     return;
1707   }
1708   fail_fn(CREATE_ERROR("Error dir is not ready %s: %s", dir_path, strerror(errno)));
1709 }
1710 
1711 // All public String android.os.Build constants, and the system properties they're pulled from
1712 std::pair<const char*, const char*> build_constants[] = {
1713         std::pair("ID", "ro.build.id"),
1714         std::pair("DISPLAY", "ro.build.display.id"),
1715         std::pair("PRODUCT", "ro.product.name"),
1716         std::pair("DEVICE", "ro.product.device"),
1717         std::pair("BOARD", "ro.product.board"),
1718         std::pair("MANUFACTURER", "ro.product.manufacturer"),
1719         std::pair("BRAND", "ro.product.brand"),
1720         std::pair("MODEL", "ro.product.model"),
1721         std::pair("BOOTLOADER", "ro.bootloader"),
1722         std::pair("HARDWARE", "ro.hardware"),
1723         std::pair("SKU", "ro.boot.hardware.sku"),
1724         std::pair("ODM_SKU", "ro.boot.product.hardware.sku"),
1725         std::pair("TAGS", "ro.build.tags"),
1726         std::pair("TYPE", "ro.build.type"),
1727         std::pair("USER", "ro.build.user"),
1728         std::pair("HOST", "ro.build.host"),
1729 };
1730 
1731 // All public String Build.VERSION constants, and the system properties they're pulled from
1732 std::pair<const char*, const char*> build_version_constants[] = {
1733         std::pair("INCREMENTAL", "ro.build.version.incremental"),
1734         std::pair("RELEASE", "ro.build.version.release"),
1735         std::pair("RELEASE_OR_CODENAME", "ro.build.version.release_or_codename"),
1736         std::pair("RELEASE_OR_PREVIEW_DISPLAY", "ro.build.version.release_or_preview_display"),
1737         std::pair("BASE_OS", "ro.build.version.base_os"),
1738         std::pair("SECURITY_PATCH", "ro.build.version.security_patch"),
1739         std::pair("SDK", "ro.build.version.sdk"),
1740         std::pair("PREVIEW_SDK_FINGERPRINT", "ro.build.version.preview_sdk_fingerprint"),
1741         std::pair("CODENAME", "ro.build.version.codename"),
1742 };
1743 
ReloadBuildJavaConstant(JNIEnv * env,jclass build_class,const char * field_name,const char * field_signature,const char * sysprop_name)1744 static void ReloadBuildJavaConstant(JNIEnv* env, jclass build_class, const char* field_name,
1745                                     const char* field_signature, const char* sysprop_name) {
1746   const prop_info* prop_info = __system_property_find(sysprop_name);
1747   std::string new_value;
1748   __system_property_read_callback(
1749           prop_info,
1750           [](void* cookie, const char* name, const char* value, unsigned serial) {
1751               auto new_value = reinterpret_cast<std::string*>(cookie);
1752               *new_value = value;
1753           },
1754           &new_value);
1755   jfieldID fieldId = env->GetStaticFieldID(build_class, field_name, field_signature);
1756   if (strcmp(field_signature, "I") == 0) {
1757     env->SetStaticIntField(build_class, fieldId, jint(strtol(new_value.c_str(), nullptr, 0)));
1758   } else if (strcmp(field_signature, "Ljava/lang/String;") == 0) {
1759     jstring string_val = env->NewStringUTF(new_value.c_str());
1760     env->SetStaticObjectField(build_class, fieldId, string_val);
1761   } else if (strcmp(field_signature, "[Ljava/lang/String;") == 0) {
1762     auto stream = std::stringstream(new_value);
1763     std::vector<std::string> items;
1764     std::string segment;
1765     while (std::getline(stream, segment, ',')) {
1766       items.push_back(segment);
1767     }
1768     jclass string_class = env->FindClass("java/lang/String");
1769     jobjectArray string_arr = env->NewObjectArray(items.size(), string_class, nullptr);
1770     for (size_t i = 0; i < items.size(); i++) {
1771       jstring string_arr_val = env->NewStringUTF(items.at(i).c_str());
1772       env->SetObjectArrayElement(string_arr, i, string_arr_val);
1773     }
1774     env->SetStaticObjectField(build_class, fieldId, string_arr);
1775   } else if (strcmp(field_signature, "J") == 0) {
1776     env->SetStaticLongField(build_class, fieldId, jlong(strtoll(new_value.c_str(), nullptr, 0)));
1777   }
1778 }
1779 
ReloadBuildJavaConstants(JNIEnv * env)1780 static void ReloadBuildJavaConstants(JNIEnv* env) {
1781   jclass build_cls = env->FindClass("android/os/Build");
1782   size_t arr_size = sizeof(build_constants) / sizeof(build_constants[0]);
1783   for (size_t i = 0; i < arr_size; i++) {
1784     const char* field_name = build_constants[i].first;
1785     const char* sysprop_name = build_constants[i].second;
1786     ReloadBuildJavaConstant(env, build_cls, field_name, "Ljava/lang/String;", sysprop_name);
1787   }
1788   jclass build_version_cls = env->FindClass("android/os/Build$VERSION");
1789   arr_size = sizeof(build_version_constants) / sizeof(build_version_constants[0]);
1790   for (size_t i = 0; i < arr_size; i++) {
1791     const char* field_name = build_version_constants[i].first;
1792     const char* sysprop_name = build_version_constants[i].second;
1793     ReloadBuildJavaConstant(env, build_version_cls, field_name, "Ljava/lang/String;", sysprop_name);
1794   }
1795 
1796   // Reload the public String[] constants
1797   ReloadBuildJavaConstant(env, build_cls, "SUPPORTED_ABIS", "[Ljava/lang/String;",
1798                           "ro.product.cpu.abilist");
1799   ReloadBuildJavaConstant(env, build_cls, "SUPPORTED_32_BIT_ABIS", "[Ljava/lang/String;",
1800                           "ro.product.cpu.abilist32");
1801   ReloadBuildJavaConstant(env, build_cls, "SUPPORTED_64_BIT_ABIS", "[Ljava/lang/String;",
1802                           "ro.product.cpu.abilist64");
1803   ReloadBuildJavaConstant(env, build_version_cls, "ALL_CODENAMES", "[Ljava/lang/String;",
1804                           "ro.build.version.all_codenames");
1805 
1806   // Reload the public int/long constants
1807   ReloadBuildJavaConstant(env, build_cls, "TIME", "J", "ro.build.date.utc");
1808   ReloadBuildJavaConstant(env, build_version_cls, "SDK_INT", "I", "ro.build.version.sdk");
1809   ReloadBuildJavaConstant(env, build_version_cls, "PREVIEW_SDK_INT", "I",
1810                           "ro.build.version.preview_sdk");
1811 
1812   // Re-derive the fingerprint
1813   jmethodID derive_fingerprint =
1814           env->GetStaticMethodID(build_cls, "deriveFingerprint", "()Ljava/lang/String;");
1815   auto new_fingerprint = (jstring)(env->CallStaticObjectMethod(build_cls, derive_fingerprint));
1816   jfieldID fieldId = env->GetStaticFieldID(build_cls, "FINGERPRINT", "Ljava/lang/String;");
1817   env->SetStaticObjectField(build_cls, fieldId, new_fingerprint);
1818 }
1819 
BindMountSyspropOverride(fail_fn_t fail_fn,JNIEnv * env)1820 static void BindMountSyspropOverride(fail_fn_t fail_fn, JNIEnv* env) {
1821   std::string source = "/dev/__properties__/appcompat_override";
1822   std::string target = "/dev/__properties__";
1823   if (access(source.c_str(), F_OK) != 0) {
1824       return;
1825   }
1826   if (access(target.c_str(), F_OK) != 0) {
1827       return;
1828   }
1829   BindMount(source, target, fail_fn);
1830   // Reload the system properties file, to ensure new values are read into memory
1831   __system_properties_zygote_reload();
1832   // android.os.Build constants are pulled from system properties, so they must be reloaded, too
1833   ReloadBuildJavaConstants(env);
1834 }
1835 
MountInitOverride(fail_fn_t fail_fn,JNIEnv * env)1836 static void MountInitOverride(fail_fn_t fail_fn, JNIEnv* env) {
1837     const char* init_etc_dir = "/system/etc/init";
1838 
1839     if (TEMP_FAILURE_RETRY(mount("tmpfs", init_etc_dir, "tmpfs", MS_NOSUID | MS_NODEV | MS_NOEXEC,
1840                                  "uid=0,gid=0,mode=0751")) == -1) {
1841         fail_fn(CREATE_ERROR("Failed to mount tmpfs %s: %s", init_etc_dir, strerror(errno)));
1842     }
1843 }
1844 
BindMountStorageToLowerFs(const userid_t user_id,const uid_t uid,const char * dir_name,const char * package,fail_fn_t fail_fn)1845 static void BindMountStorageToLowerFs(const userid_t user_id, const uid_t uid,
1846     const char* dir_name, const char* package, fail_fn_t fail_fn) {
1847     bool hasSdcardFs = IsSdcardfsUsed();
1848     std::string source;
1849     if (hasSdcardFs) {
1850         source = StringPrintf("/mnt/runtime/default/emulated/%d/%s/%s", user_id, dir_name, package);
1851     } else {
1852         source = StringPrintf("/mnt/pass_through/%d/emulated/%d/%s/%s", user_id, user_id, dir_name,
1853                               package);
1854     }
1855 
1856   // Directory might be not ready, as prepareStorageDirs() is running asynchronously in ProcessList,
1857   // so wait until dir is created.
1858   WaitUntilDirReady(source, fail_fn);
1859   std::string target = StringPrintf("/storage/emulated/%d/%s/%s", user_id, dir_name, package);
1860 
1861   // As the parent is mounted as tmpfs, we need to create the target dir here.
1862   PrepareDirIfNotPresent(target, 0700, uid, uid, fail_fn);
1863 
1864   if (access(source.c_str(), F_OK) != 0) {
1865     fail_fn(CREATE_ERROR("Error accessing %s: %s", source.c_str(), strerror(errno)));
1866   }
1867   if (access(target.c_str(), F_OK) != 0) {
1868     fail_fn(CREATE_ERROR("Error accessing %s: %s", target.c_str(), strerror(errno)));
1869   }
1870   BindMount(source, target, fail_fn);
1871 }
1872 
1873 // Mount tmpfs on Android/data and Android/obb, then bind mount all app visible package
1874 // directories in data and obb directories.
BindMountStorageDirs(JNIEnv * env,jobjectArray pkg_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1875 static void BindMountStorageDirs(JNIEnv* env, jobjectArray pkg_data_info_list,
1876     uid_t uid, const char* process_name, jstring managed_nice_name, fail_fn_t fail_fn) {
1877 
1878   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1879   const userid_t user_id = multiuser_get_user_id(uid);
1880 
1881   // Fuse is ready, so we can start using fuse path.
1882   int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
1883 
1884   // Create tmpfs on Android/obb and Android/data so these 2 dirs won't enter fuse anymore.
1885   std::string androidObbDir = StringPrintf("/storage/emulated/%d/Android/obb", user_id);
1886   MountAppDataTmpFs(androidObbDir, fail_fn);
1887   std::string androidDataDir = StringPrintf("/storage/emulated/%d/Android/data", user_id);
1888   MountAppDataTmpFs(androidDataDir, fail_fn);
1889 
1890   // Bind mount each package obb directory
1891   for (int i = 0; i < size; i += 3) {
1892     jstring package_str = (jstring) (env->GetObjectArrayElement(pkg_data_info_list, i));
1893     std::string packageName = extract_fn(package_str).value();
1894     BindMountStorageToLowerFs(user_id, uid, "Android/obb", packageName.c_str(), fail_fn);
1895     BindMountStorageToLowerFs(user_id, uid, "Android/data", packageName.c_str(), fail_fn);
1896   }
1897 }
1898 
1899 // Utility routine to specialize a zygote child process.
SpecializeCommon(JNIEnv * env,uid_t uid,gid_t gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jlong permitted_capabilities,jlong effective_capabilities,jlong bounding_capabilities,jint mount_external,jstring managed_se_info,jstring managed_nice_name,bool is_system_server,bool is_child_zygote,jstring managed_instruction_set,jstring managed_app_data_dir,bool is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,bool mount_data_dirs,bool mount_storage_dirs,bool mount_sysprop_overrides)1900 static void SpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray gids, jint runtime_flags,
1901                              jobjectArray rlimits, jlong permitted_capabilities,
1902                              jlong effective_capabilities, jlong bounding_capabilities,
1903                              jint mount_external, jstring managed_se_info,
1904                              jstring managed_nice_name, bool is_system_server, bool is_child_zygote,
1905                              jstring managed_instruction_set, jstring managed_app_data_dir,
1906                              bool is_top_app, jobjectArray pkg_data_info_list,
1907                              jobjectArray allowlisted_data_info_list, bool mount_data_dirs,
1908                              bool mount_storage_dirs, bool mount_sysprop_overrides) {
1909     const char* process_name = is_system_server ? "system_server" : "zygote";
1910     auto fail_fn = std::bind(ZygoteFailure, env, process_name, managed_nice_name, _1);
1911     auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1912 
1913     auto se_info = extract_fn(managed_se_info);
1914     auto nice_name = extract_fn(managed_nice_name);
1915     auto instruction_set = extract_fn(managed_instruction_set);
1916     auto app_data_dir = extract_fn(managed_app_data_dir);
1917 
1918     // Permit bounding capabilities
1919     permitted_capabilities |= bounding_capabilities;
1920 
1921     // Keep capabilities across UID change, unless we're staying root.
1922     if (uid != 0) {
1923         EnableKeepCapabilities(fail_fn);
1924     }
1925 
1926     SetInheritable(permitted_capabilities, fail_fn);
1927 
1928     DropCapabilitiesBoundingSet(fail_fn, bounding_capabilities);
1929 
1930     bool need_pre_initialize_native_bridge = !is_system_server && instruction_set.has_value() &&
1931             android::NativeBridgeAvailable() &&
1932             // Native bridge may be already initialized if this
1933             // is an app forked from app-zygote.
1934             !android::NativeBridgeInitialized() &&
1935             android::NeedsNativeBridge(instruction_set.value().c_str());
1936 
1937     MountEmulatedStorage(uid, mount_external, need_pre_initialize_native_bridge, fail_fn);
1938 
1939     // Make sure app is running in its own mount namespace before isolating its data directories.
1940     ensureInAppMountNamespace(fail_fn);
1941 
1942     // Isolate app data, jit profile and sandbox data directories by overlaying a tmpfs on those
1943     // dirs and bind mount all related packages separately.
1944     if (mount_data_dirs) {
1945         // Sdk sandbox data isolation does not need to occur for app processes since sepolicy
1946         // prevents access to sandbox data anyway.
1947         appid_t appId = multiuser_get_app_id(uid);
1948         if (appId >= AID_SDK_SANDBOX_PROCESS_START && appId <= AID_SDK_SANDBOX_PROCESS_END) {
1949             isolateSdkSandboxData(env, pkg_data_info_list, uid, process_name, managed_nice_name,
1950                                   fail_fn);
1951         }
1952         isolateAppData(env, pkg_data_info_list, allowlisted_data_info_list, uid, process_name,
1953                        managed_nice_name, fail_fn);
1954         isolateJitProfile(env, pkg_data_info_list, uid, process_name, managed_nice_name, fail_fn);
1955     }
1956     // MOUNT_EXTERNAL_INSTALLER, MOUNT_EXTERNAL_PASS_THROUGH, MOUNT_EXTERNAL_ANDROID_WRITABLE apps
1957     // will have mount_storage_dirs == false here (set by ProcessList.needsStorageDataIsolation()),
1958     // and hence they won't bind mount storage dirs.
1959     if (mount_storage_dirs) {
1960         BindMountStorageDirs(env, pkg_data_info_list, uid, process_name, managed_nice_name,
1961                              fail_fn);
1962     }
1963 
1964     if (mount_sysprop_overrides) {
1965         BindMountSyspropOverride(fail_fn, env);
1966         MountInitOverride(fail_fn, env);
1967     }
1968 
1969     // If this zygote isn't root, it won't be able to create a process group,
1970     // since the directory is owned by root.
1971     if (getuid() == 0) {
1972         const int rc = createProcessGroup(uid, getpid());
1973         if (rc != 0) {
1974             fail_fn(rc == -EROFS ? CREATE_ERROR("createProcessGroup failed, kernel missing "
1975                                                 "CONFIG_CGROUP_CPUACCT?")
1976                                  : CREATE_ERROR("createProcessGroup(%d, %d) failed: %s", uid,
1977                                                 /* pid= */ 0, strerror(-rc)));
1978         }
1979     }
1980 
1981     SetGids(env, gids, is_child_zygote, fail_fn);
1982     SetRLimits(env, rlimits, fail_fn);
1983 
1984     if (need_pre_initialize_native_bridge) {
1985         // Due to the logic behind need_pre_initialize_native_bridge we know that
1986         // instruction_set contains a value.
1987         android::PreInitializeNativeBridge(app_data_dir.has_value() ? app_data_dir.value().c_str()
1988                                                                     : nullptr,
1989                                            instruction_set.value().c_str());
1990     }
1991 
1992     if (is_system_server && !(runtime_flags & RuntimeFlags::PROFILE_SYSTEM_SERVER)) {
1993         // Prefetch the classloader for the system server. This is done early to
1994         // allow a tie-down of the proper system server selinux domain.
1995         // We don't prefetch when the system server is being profiled to avoid
1996         // loading AOT code.
1997         env->CallStaticObjectMethod(gZygoteInitClass, gGetOrCreateSystemServerClassLoader);
1998         if (env->ExceptionCheck()) {
1999             // Be robust here. The Java code will attempt to create the classloader
2000             // at a later point (but may not have rights to use AoT artifacts).
2001             env->ExceptionClear();
2002         }
2003         // Also prefetch standalone system server jars. The reason for doing this here is the same
2004         // as above.
2005         env->CallStaticVoidMethod(gZygoteInitClass, gPrefetchStandaloneSystemServerJars);
2006         if (env->ExceptionCheck()) {
2007             env->ExceptionClear();
2008         }
2009     }
2010 
2011     if (setresgid(gid, gid, gid) == -1) {
2012         fail_fn(CREATE_ERROR("setresgid(%d) failed: %s", gid, strerror(errno)));
2013     }
2014 
2015     // Must be called when the new process still has CAP_SYS_ADMIN, in this case,
2016     // before changing uid from 0, which clears capabilities.  The other
2017     // alternative is to call prctl(PR_SET_NO_NEW_PRIVS, 1) afterward, but that
2018     // breaks SELinux domain transition (see b/71859146).  As the result,
2019     // privileged syscalls used below still need to be accessible in app process.
2020     SetUpSeccompFilter(uid, is_child_zygote);
2021 
2022     // Must be called before losing the permission to set scheduler policy.
2023     SetSchedulerPolicy(fail_fn, is_top_app);
2024 
2025     if (setresuid(uid, uid, uid) == -1) {
2026         fail_fn(CREATE_ERROR("setresuid(%d) failed: %s", uid, strerror(errno)));
2027     }
2028 
2029     // The "dumpable" flag of a process, which controls core dump generation, is
2030     // overwritten by the value in /proc/sys/fs/suid_dumpable when the effective
2031     // user or group ID changes. See proc(5) for possible values. In most cases,
2032     // the value is 0, so core dumps are disabled for zygote children. However,
2033     // when running in a Chrome OS container, the value is already set to 2,
2034     // which allows the external crash reporter to collect all core dumps. Since
2035     // only system crashes are interested, core dump is disabled for app
2036     // processes. This also ensures compliance with CTS.
2037     int dumpable = prctl(PR_GET_DUMPABLE);
2038     if (dumpable == -1) {
2039         ALOGE("prctl(PR_GET_DUMPABLE) failed: %s", strerror(errno));
2040         RuntimeAbort(env, __LINE__, "prctl(PR_GET_DUMPABLE) failed");
2041     }
2042 
2043     if (dumpable == 2 && uid >= AID_APP) {
2044         if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) == -1) {
2045             ALOGE("prctl(PR_SET_DUMPABLE, 0) failed: %s", strerror(errno));
2046             RuntimeAbort(env, __LINE__, "prctl(PR_SET_DUMPABLE, 0) failed");
2047         }
2048     }
2049 
2050     // Set process properties to enable debugging if required.
2051     if ((runtime_flags & RuntimeFlags::DEBUG_ENABLE_PTRACE) != 0) {
2052         EnableDebugger();
2053         // Don't pass unknown flag to the ART runtime.
2054         runtime_flags &= ~RuntimeFlags::DEBUG_ENABLE_PTRACE;
2055     }
2056     if ((runtime_flags & RuntimeFlags::PROFILE_FROM_SHELL) != 0) {
2057         // simpleperf needs the process to be dumpable to profile it.
2058         if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
2059             ALOGE("prctl(PR_SET_DUMPABLE) failed: %s", strerror(errno));
2060             RuntimeAbort(env, __LINE__, "prctl(PR_SET_DUMPABLE, 1) failed");
2061         }
2062     }
2063 
2064     HeapTaggingLevel heap_tagging_level;
2065     switch (runtime_flags & RuntimeFlags::MEMORY_TAG_LEVEL_MASK) {
2066         case RuntimeFlags::MEMORY_TAG_LEVEL_TBI:
2067             heap_tagging_level = M_HEAP_TAGGING_LEVEL_TBI;
2068             break;
2069         case RuntimeFlags::MEMORY_TAG_LEVEL_ASYNC:
2070             heap_tagging_level = M_HEAP_TAGGING_LEVEL_ASYNC;
2071             break;
2072         case RuntimeFlags::MEMORY_TAG_LEVEL_SYNC:
2073             heap_tagging_level = M_HEAP_TAGGING_LEVEL_SYNC;
2074             break;
2075         default:
2076             heap_tagging_level = M_HEAP_TAGGING_LEVEL_NONE;
2077             break;
2078     }
2079     mallopt(M_BIONIC_SET_HEAP_TAGGING_LEVEL, heap_tagging_level);
2080 
2081     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
2082     // runtime.
2083     runtime_flags &= ~RuntimeFlags::MEMORY_TAG_LEVEL_MASK;
2084 
2085     // Avoid heap zero initialization for applications without MTE. Zero init may
2086     // cause app compat problems, use more memory, or reduce performance. While it
2087     // would be nice to have them for apps, we will have to wait until they are
2088     // proven out, have more efficient hardware, and/or apply them only to new
2089     // applications.
2090     if (!(runtime_flags & RuntimeFlags::NATIVE_HEAP_ZERO_INIT_ENABLED)) {
2091         mallopt(M_BIONIC_ZERO_INIT, 0);
2092     }
2093 
2094     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
2095     // runtime.
2096     runtime_flags &= ~RuntimeFlags::NATIVE_HEAP_ZERO_INIT_ENABLED;
2097 
2098     const char* nice_name_ptr = nice_name.has_value() ? nice_name.value().c_str() : nullptr;
2099     android_mallopt_gwp_asan_options_t gwp_asan_options;
2100     const char* kGwpAsanAppRecoverableSysprop =
2101             "persist.device_config.memory_safety_native.gwp_asan_recoverable_apps";
2102     // The system server doesn't have its nice name set by the time SpecializeCommon is called.
2103     gwp_asan_options.program_name = nice_name_ptr ?: process_name;
2104     switch (runtime_flags & RuntimeFlags::GWP_ASAN_LEVEL_MASK) {
2105         default:
2106         case RuntimeFlags::GWP_ASAN_LEVEL_DEFAULT:
2107             gwp_asan_options.mode = GetBoolProperty(kGwpAsanAppRecoverableSysprop, true)
2108                     ? Mode::APP_MANIFEST_DEFAULT
2109                     : Mode::APP_MANIFEST_NEVER;
2110             android_mallopt(M_INITIALIZE_GWP_ASAN, &gwp_asan_options, sizeof(gwp_asan_options));
2111             break;
2112         case RuntimeFlags::GWP_ASAN_LEVEL_NEVER:
2113             gwp_asan_options.mode = Mode::APP_MANIFEST_NEVER;
2114             android_mallopt(M_INITIALIZE_GWP_ASAN, &gwp_asan_options, sizeof(gwp_asan_options));
2115             break;
2116         case RuntimeFlags::GWP_ASAN_LEVEL_ALWAYS:
2117             gwp_asan_options.mode = Mode::APP_MANIFEST_ALWAYS;
2118             android_mallopt(M_INITIALIZE_GWP_ASAN, &gwp_asan_options, sizeof(gwp_asan_options));
2119             break;
2120         case RuntimeFlags::GWP_ASAN_LEVEL_LOTTERY:
2121             gwp_asan_options.mode = Mode::APP_MANIFEST_DEFAULT;
2122             android_mallopt(M_INITIALIZE_GWP_ASAN, &gwp_asan_options, sizeof(gwp_asan_options));
2123             break;
2124     }
2125     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
2126     // runtime.
2127     runtime_flags &= ~RuntimeFlags::GWP_ASAN_LEVEL_MASK;
2128 
2129     SetCapabilities(permitted_capabilities, effective_capabilities, permitted_capabilities,
2130                     fail_fn);
2131 
2132     if ((runtime_flags & RuntimeFlags::ENABLE_PAGE_SIZE_APP_COMPAT) != 0) {
2133         android_set_16kb_appcompat_mode(true);
2134         // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
2135         // runtime.
2136         runtime_flags &= ~RuntimeFlags::ENABLE_PAGE_SIZE_APP_COMPAT;
2137     }
2138     __android_log_close();
2139     AStatsSocket_close();
2140 
2141     const char* se_info_ptr = se_info.has_value() ? se_info.value().c_str() : nullptr;
2142 
2143     if (selinux_android_setcontext(uid, is_system_server, se_info_ptr, nice_name_ptr) == -1) {
2144         fail_fn(CREATE_ERROR("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
2145                              is_system_server, se_info_ptr, nice_name_ptr));
2146     }
2147 
2148     // Make it easier to debug audit logs by setting the main thread's name to the
2149     // nice name rather than "app_process".
2150     if (nice_name.has_value()) {
2151         SetThreadName(nice_name.value());
2152     } else if (is_system_server) {
2153         SetThreadName("system_server");
2154     }
2155 
2156     // Unset the SIGCHLD handler, but keep ignoring SIGHUP (rationale in SetSignalHandlers).
2157     UnsetChldSignalHandler();
2158 
2159     if (is_system_server) {
2160         env->CallStaticVoidMethod(gZygoteClass, gCallPostForkSystemServerHooks, runtime_flags);
2161         if (env->ExceptionCheck()) {
2162             fail_fn("Error calling post fork system server hooks.");
2163         }
2164 
2165         // TODO(b/117874058): Remove hardcoded label here.
2166         static const char* kSystemServerLabel = "u:r:system_server:s0";
2167         if (selinux_android_setcon(kSystemServerLabel) != 0) {
2168             fail_fn(CREATE_ERROR("selinux_android_setcon(%s)", kSystemServerLabel));
2169         }
2170     }
2171 
2172     if (is_child_zygote) {
2173         initUnsolSocketToSystemServer();
2174     }
2175 
2176     env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, runtime_flags,
2177                               is_system_server, is_child_zygote, managed_instruction_set);
2178 
2179     // Reset the process priority to the default value.
2180     setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_DEFAULT);
2181 
2182     if (env->ExceptionCheck()) {
2183         fail_fn("Error calling post fork hooks.");
2184     }
2185 }
2186 
GetEffectiveCapabilityMask(JNIEnv * env)2187 static uint64_t GetEffectiveCapabilityMask(JNIEnv* env) {
2188     __user_cap_header_struct capheader;
2189     memset(&capheader, 0, sizeof(capheader));
2190     capheader.version = _LINUX_CAPABILITY_VERSION_3;
2191     capheader.pid = 0;
2192 
2193     __user_cap_data_struct capdata[2];
2194     if (capget(&capheader, &capdata[0]) == -1) {
2195         ALOGE("capget failed: %s", strerror(errno));
2196         RuntimeAbort(env, __LINE__, "capget failed");
2197     }
2198 
2199     return capdata[0].effective | (static_cast<uint64_t>(capdata[1].effective) << 32);
2200 }
2201 
CalculateBoundingCapabilities(JNIEnv * env,jint uid,jint gid,jintArray gids)2202 static jlong CalculateBoundingCapabilities(JNIEnv* env, jint uid, jint gid, jintArray gids) {
2203     jlong capabilities = 0;
2204 
2205     /*
2206      * Grant CAP_SYS_NICE to CapInh/CapPrm/CapBnd for processes that can spawn
2207      * VMs.  This enables processes to execve on binaries with elevated
2208      * capabilities if its file capability bits are set. This does not grant
2209      * capability to the parent process(that spawns the VM) as the effective
2210      * bits are not set.
2211      */
2212     if (MatchGid(env, gids, gid, AID_VIRTUALMACHINE)) {
2213         capabilities |= (1LL << CAP_SYS_NICE);
2214     }
2215 
2216     return capabilities;
2217 }
2218 
CalculateCapabilities(JNIEnv * env,jint uid,jint gid,jintArray gids,bool is_child_zygote)2219 static jlong CalculateCapabilities(JNIEnv* env, jint uid, jint gid, jintArray gids,
2220                                    bool is_child_zygote) {
2221   jlong capabilities = 0;
2222 
2223   /*
2224    *  Grant the following capabilities to the Bluetooth user:
2225    *    - CAP_WAKE_ALARM
2226    *    - CAP_NET_ADMIN
2227    *    - CAP_NET_RAW
2228    *    - CAP_NET_BIND_SERVICE (for DHCP client functionality)
2229    *    - CAP_SYS_NICE (for setting RT priority for audio-related threads)
2230    */
2231 
2232   if (multiuser_get_app_id(uid) == AID_BLUETOOTH) {
2233     capabilities |= (1LL << CAP_WAKE_ALARM);
2234     capabilities |= (1LL << CAP_NET_ADMIN);
2235     capabilities |= (1LL << CAP_NET_RAW);
2236     capabilities |= (1LL << CAP_NET_BIND_SERVICE);
2237     capabilities |= (1LL << CAP_SYS_NICE);
2238   }
2239 
2240   if (multiuser_get_app_id(uid) == AID_NETWORK_STACK) {
2241     capabilities |= (1LL << CAP_WAKE_ALARM);
2242     capabilities |= (1LL << CAP_NET_ADMIN);
2243     capabilities |= (1LL << CAP_NET_BROADCAST);
2244     capabilities |= (1LL << CAP_NET_BIND_SERVICE);
2245     capabilities |= (1LL << CAP_NET_RAW);
2246   }
2247 
2248   /*
2249    * Grant CAP_BLOCK_SUSPEND to processes that belong to GID "wakelock"
2250    */
2251 
2252   if (MatchGid(env, gids, gid, AID_WAKELOCK)) {
2253     capabilities |= (1LL << CAP_BLOCK_SUSPEND);
2254   }
2255 
2256   /*
2257    * Grant child Zygote processes the following capabilities:
2258    *   - CAP_SETUID (change UID of child processes)
2259    *   - CAP_SETGID (change GID of child processes)
2260    *   - CAP_SETPCAP (change capabilities of child processes)
2261    */
2262 
2263   if (is_child_zygote) {
2264     capabilities |= (1LL << CAP_SETUID);
2265     capabilities |= (1LL << CAP_SETGID);
2266     capabilities |= (1LL << CAP_SETPCAP);
2267   }
2268 
2269   /*
2270    * Containers run without some capabilities, so drop any caps that are not
2271    * available.
2272    */
2273 
2274   return capabilities & GetEffectiveCapabilityMask(env);
2275 }
2276 
2277 /**
2278  * Adds the given information about a newly created unspecialized app
2279  * processes to the Zygote's USAP table.
2280  *
2281  * @param usap_pid  Process ID of the newly created USAP
2282  * @param read_pipe_fd  File descriptor for the read end of the USAP
2283  * reporting pipe.  Used in the ZygoteServer poll loop to track USAP
2284  * specialization.
2285  */
AddUsapTableEntry(pid_t usap_pid,int read_pipe_fd)2286 static void AddUsapTableEntry(pid_t usap_pid, int read_pipe_fd) {
2287   static int sUsapTableInsertIndex = 0;
2288 
2289   int search_index = sUsapTableInsertIndex;
2290   do {
2291     if (gUsapTable[search_index].SetIfInvalid(usap_pid, read_pipe_fd)) {
2292       ++gUsapPoolCount;
2293 
2294       // Start our next search right after where we finished this one.
2295       sUsapTableInsertIndex = (search_index + 1) % gUsapTable.size();
2296 
2297       return;
2298     }
2299 
2300     search_index = (search_index + 1) % gUsapTable.size();
2301   } while (search_index != sUsapTableInsertIndex);
2302 
2303   // Much like money in the banana stand, there should always be an entry
2304   // in the USAP table.
2305   __builtin_unreachable();
2306 }
2307 
2308 /**
2309  * Invalidates the entry in the USAPTable corresponding to the provided
2310  * process ID if it is present.  If an entry was removed the USAP pool
2311  * count is decremented. May be called from signal handler.
2312  *
2313  * @param usap_pid  Process ID of the USAP entry to invalidate
2314  * @return True if an entry was invalidated; false otherwise
2315  */
RemoveUsapTableEntry(pid_t usap_pid)2316 static bool RemoveUsapTableEntry(pid_t usap_pid) {
2317   for (UsapTableEntry& entry : gUsapTable) {
2318     if (entry.ClearForPID(usap_pid)) {
2319       --gUsapPoolCount;
2320       return true;
2321     }
2322   }
2323 
2324   return false;
2325 }
2326 
2327 /**
2328  * @return A vector of the read pipe FDs for each of the active USAPs.
2329  */
MakeUsapPipeReadFDVector()2330 std::vector<int> MakeUsapPipeReadFDVector() {
2331   std::vector<int> fd_vec;
2332   fd_vec.reserve(gUsapTable.size());
2333 
2334   for (UsapTableEntry& entry : gUsapTable) {
2335     auto entry_values = entry.GetValues();
2336 
2337     if (entry_values.has_value()) {
2338       fd_vec.push_back(entry_values.value().read_pipe_fd);
2339     }
2340   }
2341 
2342   return fd_vec;
2343 }
2344 
UnmountStorageOnInit(JNIEnv * env)2345 static void UnmountStorageOnInit(JNIEnv* env) {
2346   // Zygote process unmount root storage space initially before every child processes are forked.
2347   // Every forked child processes (include SystemServer) only mount their own root storage space
2348   // and no need unmount storage operation in MountEmulatedStorage method.
2349   // Zygote process does not utilize root storage spaces and unshares its mount namespace below.
2350 
2351   // See storage config details at http://source.android.com/tech/storage/
2352   // Create private mount namespace shared by all children
2353   if (unshare(CLONE_NEWNS) == -1) {
2354     RuntimeAbort(env, __LINE__, "Failed to unshare()");
2355     return;
2356   }
2357 
2358   // Mark rootfs as being MS_SLAVE so that changes from default
2359   // namespace only flow into our children.
2360   if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
2361     RuntimeAbort(env, __LINE__, "Failed to mount() rootfs as MS_SLAVE");
2362     return;
2363   }
2364 
2365   // Create a staging tmpfs that is shared by our children; they will
2366   // bind mount storage into their respective private namespaces, which
2367   // are isolated from each other.
2368   const char* target_base = getenv("EMULATED_STORAGE_TARGET");
2369   if (target_base != nullptr) {
2370 #define STRINGIFY_UID(x) __STRING(x)
2371     if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV,
2372               "uid=0,gid=" STRINGIFY_UID(AID_SDCARD_R) ",mode=0751") == -1) {
2373       ALOGE("Failed to mount tmpfs to %s", target_base);
2374       RuntimeAbort(env, __LINE__, "Failed to mount tmpfs");
2375       return;
2376     }
2377 #undef STRINGIFY_UID
2378   }
2379 
2380   UnmountTree("/storage");
2381 }
2382 
2383 }  // anonymous namespace
2384 
2385 namespace android {
2386 
2387 /**
2388  * A failure function used to report fatal errors to the managed runtime.  This
2389  * function is often curried with the process name information and then passed
2390  * to called functions.
2391  *
2392  * @param env  Managed runtime environment
2393  * @param process_name  A native representation of the process name
2394  * @param managed_process_name  A managed representation of the process name
2395  * @param msg  The error message to be reported
2396  */
2397 [[noreturn]]
ZygoteFailure(JNIEnv * env,const char * process_name,jstring managed_process_name,const std::string & msg)2398 void zygote::ZygoteFailure(JNIEnv* env,
2399                            const char* process_name,
2400                            jstring managed_process_name,
2401                            const std::string& msg) {
2402   std::unique_ptr<ScopedUtfChars> scoped_managed_process_name_ptr = nullptr;
2403   if (managed_process_name != nullptr) {
2404     scoped_managed_process_name_ptr.reset(new ScopedUtfChars(env, managed_process_name));
2405     if (scoped_managed_process_name_ptr->c_str() != nullptr) {
2406       process_name = scoped_managed_process_name_ptr->c_str();
2407     }
2408   }
2409 
2410   const std::string& error_msg =
2411       (process_name == nullptr || process_name[0] == '\0') ?
2412       msg : StringPrintf("(%s) %s", process_name, msg.c_str());
2413 
2414   env->FatalError(error_msg.c_str());
2415   __builtin_unreachable();
2416 }
2417 
2418 static std::set<int>* gPreloadFds = nullptr;
2419 static bool gPreloadFdsExtracted = false;
2420 
2421 // Utility routine to fork a process from the zygote.
2422 NO_STACK_PROTECTOR
ForkCommon(JNIEnv * env,bool is_system_server,const std::vector<int> & fds_to_close,const std::vector<int> & fds_to_ignore,bool is_priority_fork,bool purge)2423 pid_t zygote::ForkCommon(JNIEnv* env, bool is_system_server,
2424                          const std::vector<int>& fds_to_close,
2425                          const std::vector<int>& fds_to_ignore,
2426                          bool is_priority_fork,
2427                          bool purge) {
2428   ATRACE_CALL();
2429   if (is_priority_fork) {
2430     setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MAX);
2431   }
2432 
2433   SetSignalHandlers();
2434 
2435   // Curry a failure function.
2436   auto fail_fn = std::bind(zygote::ZygoteFailure, env,
2437                            is_system_server ? "system_server" : "zygote",
2438                            nullptr, _1);
2439 
2440   // Temporarily block SIGCHLD during forks. The SIGCHLD handler might
2441   // log, which would result in the logging FDs we close being reopened.
2442   // This would cause failures because the FDs are not allowlisted.
2443   //
2444   // Note that the zygote process is single threaded at this point.
2445   BlockSignal(SIGCHLD, fail_fn);
2446 
2447   // Close any logging related FDs before we start evaluating the list of
2448   // file descriptors.
2449   __android_log_close();
2450   AStatsSocket_close();
2451 
2452   // If this is the first fork for this zygote, create the open FD table,
2453   // verifying that files are of supported type and allowlisted.  Otherwise (not
2454   // the first fork), check that the open files have not changed.  Newly open
2455   // files are not expected, and will be disallowed in the future.  Currently
2456   // they are allowed if they pass the same checks as in the
2457   // FileDescriptorTable::Create() above.
2458   if (gOpenFdTable == nullptr) {
2459     gOpenFdTable = FileDescriptorTable::Create(fds_to_ignore, fail_fn);
2460   } else {
2461     gOpenFdTable->Restat(fds_to_ignore, fail_fn);
2462   }
2463 
2464   android_fdsan_error_level fdsan_error_level = android_fdsan_get_error_level();
2465 
2466   if (purge) {
2467     // Purge unused native memory in an attempt to reduce the amount of false
2468     // sharing with the child process.  By reducing the size of the libc_malloc
2469     // region shared with the child process we reduce the number of pages that
2470     // transition to the private-dirty state when malloc adjusts the meta-data
2471     // on each of the pages it is managing after the fork.
2472     if (mallopt(M_PURGE_ALL, 0) != 1) {
2473       mallopt(M_PURGE, 0);
2474     }
2475   }
2476 
2477   pid_t pid = fork();
2478 
2479   if (pid == 0) {
2480     if (is_priority_fork) {
2481       setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MAX);
2482     } else {
2483       setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MIN);
2484     }
2485 
2486 #if defined(__BIONIC__) && !defined(NO_RESET_STACK_PROTECTOR)
2487     // Reset the stack guard for the new process.
2488     android_reset_stack_guards();
2489 #endif
2490 
2491     // The child process.
2492     PreApplicationInit();
2493 
2494     // Clean up any descriptors which must be closed immediately
2495     DetachDescriptors(env, fds_to_close, fail_fn);
2496 
2497     // Invalidate the entries in the USAP table.
2498     ClearUsapTable();
2499 
2500     // Re-open all remaining open file descriptors so that they aren't shared
2501     // with the zygote across a fork.
2502     gOpenFdTable->ReopenOrDetach(fail_fn);
2503 
2504     // Turn fdsan back on.
2505     android_fdsan_set_error_level(fdsan_error_level);
2506 
2507     // Reset the fd to the unsolicited zygote socket
2508     gSystemServerSocketFd = -1;
2509   } else if (pid == -1) {
2510     ALOGE("Failed to fork child process: %s (%d)", strerror(errno), errno);
2511   } else {
2512     ALOGD("Forked child process %d", pid);
2513   }
2514 
2515   // We blocked SIGCHLD prior to a fork, we unblock it here.
2516   UnblockSignal(SIGCHLD, fail_fn);
2517 
2518   if (is_priority_fork && pid != 0) {
2519     setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_DEFAULT);
2520   }
2521 
2522   return pid;
2523 }
2524 
com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv *,jclass)2525 static void com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv*, jclass) {
2526   PreApplicationInit();
2527 }
2528 
2529 NO_STACK_PROTECTOR
com_android_internal_os_Zygote_nativeForkAndSpecialize(JNIEnv * env,jclass,jint uid,jint gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jint mount_external,jstring se_info,jstring nice_name,jintArray managed_fds_to_close,jintArray managed_fds_to_ignore,jboolean is_child_zygote,jstring instruction_set,jstring app_data_dir,jboolean is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,jboolean mount_data_dirs,jboolean mount_storage_dirs,jboolean mount_sysprop_overrides)2530 static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
2531         JNIEnv* env, jclass, jint uid, jint gid, jintArray gids, jint runtime_flags,
2532         jobjectArray rlimits, jint mount_external, jstring se_info, jstring nice_name,
2533         jintArray managed_fds_to_close, jintArray managed_fds_to_ignore, jboolean is_child_zygote,
2534         jstring instruction_set, jstring app_data_dir, jboolean is_top_app,
2535         jobjectArray pkg_data_info_list, jobjectArray allowlisted_data_info_list,
2536         jboolean mount_data_dirs, jboolean mount_storage_dirs, jboolean mount_sysprop_overrides) {
2537     jlong capabilities = CalculateCapabilities(env, uid, gid, gids, is_child_zygote);
2538     jlong bounding_capabilities = CalculateBoundingCapabilities(env, uid, gid, gids);
2539 
2540     if (UNLIKELY(managed_fds_to_close == nullptr)) {
2541       zygote::ZygoteFailure(env, "zygote", nice_name,
2542                             "Zygote received a null fds_to_close vector.");
2543     }
2544 
2545     std::vector<int> fds_to_close =
2546         ExtractJIntArray(env, "zygote", nice_name, managed_fds_to_close).value();
2547     std::vector<int> fds_to_ignore =
2548         ExtractJIntArray(env, "zygote", nice_name, managed_fds_to_ignore)
2549             .value_or(std::vector<int>());
2550 
2551     std::vector<int> usap_pipes = MakeUsapPipeReadFDVector();
2552 
2553     fds_to_close.insert(fds_to_close.end(), usap_pipes.begin(), usap_pipes.end());
2554     fds_to_ignore.insert(fds_to_ignore.end(), usap_pipes.begin(), usap_pipes.end());
2555 
2556     fds_to_close.push_back(gUsapPoolSocketFD);
2557 
2558     if (gUsapPoolEventFD != -1) {
2559       fds_to_close.push_back(gUsapPoolEventFD);
2560       fds_to_ignore.push_back(gUsapPoolEventFD);
2561     }
2562 
2563     if (gSystemServerSocketFd != -1) {
2564         fds_to_close.push_back(gSystemServerSocketFd);
2565         fds_to_ignore.push_back(gSystemServerSocketFd);
2566     }
2567 
2568     if (gPreloadFds && gPreloadFdsExtracted) {
2569         fds_to_ignore.insert(fds_to_ignore.end(), gPreloadFds->begin(), gPreloadFds->end());
2570     }
2571 
2572     pid_t pid = zygote::ForkCommon(env, /* is_system_server= */ false, fds_to_close, fds_to_ignore,
2573                                    true);
2574 
2575     if (pid == 0) {
2576         SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, capabilities, capabilities,
2577                          bounding_capabilities, mount_external, se_info, nice_name, false,
2578                          is_child_zygote == JNI_TRUE, instruction_set, app_data_dir,
2579                          is_top_app == JNI_TRUE, pkg_data_info_list, allowlisted_data_info_list,
2580                          mount_data_dirs == JNI_TRUE, mount_storage_dirs == JNI_TRUE,
2581                          mount_sysprop_overrides == JNI_TRUE);
2582     }
2583     return pid;
2584 }
2585 
2586 NO_STACK_PROTECTOR
com_android_internal_os_Zygote_nativeForkSystemServer(JNIEnv * env,jclass,uid_t uid,gid_t gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jlong permitted_capabilities,jlong effective_capabilities)2587 static jint com_android_internal_os_Zygote_nativeForkSystemServer(
2588         JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
2589         jint runtime_flags, jobjectArray rlimits, jlong permitted_capabilities,
2590         jlong effective_capabilities) {
2591   ATRACE_CALL();
2592   std::vector<int> fds_to_close(MakeUsapPipeReadFDVector()),
2593                    fds_to_ignore(fds_to_close);
2594 
2595   fds_to_close.push_back(gUsapPoolSocketFD);
2596 
2597   if (gUsapPoolEventFD != -1) {
2598     fds_to_close.push_back(gUsapPoolEventFD);
2599     fds_to_ignore.push_back(gUsapPoolEventFD);
2600   }
2601 
2602   if (gSystemServerSocketFd != -1) {
2603       fds_to_close.push_back(gSystemServerSocketFd);
2604       fds_to_ignore.push_back(gSystemServerSocketFd);
2605   }
2606 
2607   pid_t pid = zygote::ForkCommon(env, true,
2608                                  fds_to_close,
2609                                  fds_to_ignore,
2610                                  true);
2611   if (pid == 0) {
2612       // System server prcoess does not need data isolation so no need to
2613       // know pkg_data_info_list.
2614       SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, permitted_capabilities,
2615                        effective_capabilities, 0, MOUNT_EXTERNAL_DEFAULT, nullptr, nullptr, true,
2616                        false, nullptr, nullptr, /* is_top_app= */ false,
2617                        /* pkg_data_info_list */ nullptr,
2618                        /* allowlisted_data_info_list */ nullptr, false, false, false);
2619   } else if (pid > 0) {
2620       // The zygote process checks whether the child process has died or not.
2621       ALOGI("System server process %d has been created", pid);
2622       gSystemServerPid = pid;
2623       // There is a slight window that the system server process has crashed
2624       // but it went unnoticed because we haven't published its pid yet. So
2625       // we recheck here just to make sure that all is well.
2626       int status;
2627       if (waitpid(pid, &status, WNOHANG) == pid) {
2628           ALOGE("System server process %d has died. Restarting Zygote!", pid);
2629           RuntimeAbort(env, __LINE__, "System server process has died. Restarting Zygote!");
2630       }
2631 
2632       if (UsePerAppMemcg()) {
2633           // Assign system_server to the correct memory cgroup.
2634           // Not all devices mount memcg so check if it is mounted first
2635           // to avoid unnecessarily printing errors and denials in the logs.
2636           if (!SetTaskProfiles(pid, std::vector<std::string>{"SystemMemoryProcess"})) {
2637               ALOGE("couldn't add process %d into system memcg group", pid);
2638           }
2639       }
2640   }
2641   return pid;
2642 }
2643 
2644 /**
2645  * A JNI function that forks an unspecialized app process from the Zygote while
2646  * ensuring proper file descriptor hygiene.
2647  *
2648  * @param env  Managed runtime environment
2649  * @param read_pipe_fd  The read FD for the USAP reporting pipe.  Manually closed by the child
2650  * in managed code. -1 indicates none.
2651  * @param write_pipe_fd  The write FD for the USAP reporting pipe.  Manually closed by the
2652  * zygote in managed code. -1 indicates none.
2653  * @param managed_session_socket_fds  A list of anonymous session sockets that must be ignored by
2654  * the FD hygiene code and automatically "closed" in the new USAP.
2655  * @param args_known Arguments for specialization are available; no need to read from a socket
2656  * @param is_priority_fork  Controls the nice level assigned to the newly created process
2657  * @return child pid in the parent, 0 in the child
2658  */
2659 NO_STACK_PROTECTOR
com_android_internal_os_Zygote_nativeForkApp(JNIEnv * env,jclass,jint read_pipe_fd,jint write_pipe_fd,jintArray managed_session_socket_fds,jboolean args_known,jboolean is_priority_fork)2660 static jint com_android_internal_os_Zygote_nativeForkApp(JNIEnv* env,
2661                                                          jclass,
2662                                                          jint read_pipe_fd,
2663                                                          jint write_pipe_fd,
2664                                                          jintArray managed_session_socket_fds,
2665                                                          jboolean args_known,
2666                                                          jboolean is_priority_fork) {
2667   ATRACE_CALL();
2668   std::vector<int> session_socket_fds =
2669       ExtractJIntArray(env, "USAP", nullptr, managed_session_socket_fds)
2670           .value_or(std::vector<int>());
2671   return zygote::forkApp(env, read_pipe_fd, write_pipe_fd, session_socket_fds,
2672                             args_known == JNI_TRUE, is_priority_fork == JNI_TRUE, true);
2673 }
2674 
2675 NO_STACK_PROTECTOR
forkApp(JNIEnv * env,int read_pipe_fd,int write_pipe_fd,const std::vector<int> & session_socket_fds,bool args_known,bool is_priority_fork,bool purge)2676 int zygote::forkApp(JNIEnv* env,
2677                     int read_pipe_fd,
2678                     int write_pipe_fd,
2679                     const std::vector<int>& session_socket_fds,
2680                     bool args_known,
2681                     bool is_priority_fork,
2682                     bool purge) {
2683   ATRACE_CALL();
2684 
2685   std::vector<int> fds_to_close(MakeUsapPipeReadFDVector()),
2686                    fds_to_ignore(fds_to_close);
2687 
2688   fds_to_close.push_back(gZygoteSocketFD);
2689   if (gSystemServerSocketFd != -1) {
2690       fds_to_close.push_back(gSystemServerSocketFd);
2691   }
2692   if (args_known) {
2693       fds_to_close.push_back(gUsapPoolSocketFD);
2694   }
2695   fds_to_close.insert(fds_to_close.end(), session_socket_fds.begin(), session_socket_fds.end());
2696 
2697   fds_to_ignore.push_back(gUsapPoolSocketFD);
2698   fds_to_ignore.push_back(gZygoteSocketFD);
2699   if (read_pipe_fd != -1) {
2700       fds_to_ignore.push_back(read_pipe_fd);
2701   }
2702   if (write_pipe_fd != -1) {
2703       fds_to_ignore.push_back(write_pipe_fd);
2704   }
2705   fds_to_ignore.insert(fds_to_ignore.end(), session_socket_fds.begin(), session_socket_fds.end());
2706 
2707   if (gUsapPoolEventFD != -1) {
2708       fds_to_close.push_back(gUsapPoolEventFD);
2709       fds_to_ignore.push_back(gUsapPoolEventFD);
2710   }
2711   if (gSystemServerSocketFd != -1) {
2712       if (args_known) {
2713           fds_to_close.push_back(gSystemServerSocketFd);
2714       }
2715       fds_to_ignore.push_back(gSystemServerSocketFd);
2716   }
2717   if (gPreloadFds && gPreloadFdsExtracted) {
2718       fds_to_ignore.insert(fds_to_ignore.end(), gPreloadFds->begin(), gPreloadFds->end());
2719   }
2720 
2721   return zygote::ForkCommon(env, /* is_system_server= */ false, fds_to_close,
2722                             fds_to_ignore, is_priority_fork == JNI_TRUE, purge);
2723 }
2724 
com_android_internal_os_Zygote_nativeAllowFileAcrossFork(JNIEnv * env,jclass,jstring path)2725 static void com_android_internal_os_Zygote_nativeAllowFileAcrossFork(
2726         JNIEnv* env, jclass, jstring path) {
2727     ScopedUtfChars path_native(env, path);
2728     const char* path_cstr = path_native.c_str();
2729     if (!path_cstr) {
2730         RuntimeAbort(env, __LINE__, "path_cstr == nullptr");
2731     }
2732     FileDescriptorAllowlist::Get()->Allow(path_cstr);
2733 }
2734 
com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter(JNIEnv * env,jclass,jint uidGidMin,jint uidGidMax)2735 static void com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter(
2736         JNIEnv* env, jclass, jint uidGidMin, jint uidGidMax) {
2737   if (!gIsSecurityEnforced) {
2738     ALOGI("seccomp disabled by setenforce 0");
2739     return;
2740   }
2741 
2742   bool installed = install_setuidgid_seccomp_filter(uidGidMin, uidGidMax);
2743   if (!installed) {
2744       RuntimeAbort(env, __LINE__, "Could not install setuid/setgid seccomp filter.");
2745   }
2746 }
2747 
2748 /**
2749  * Called from an unspecialized app process to specialize the process for a
2750  * given application.
2751  *
2752  * @param env  Managed runtime environment
2753  * @param uid  User ID of the new application
2754  * @param gid  Group ID of the new application
2755  * @param gids  Extra groups that the process belongs to
2756  * @param runtime_flags  Flags for changing the behavior of the managed runtime
2757  * @param rlimits  Resource limits
2758  * @param mount_external  The mode (read/write/normal) that external storage will be mounted with
2759  * @param se_info  SELinux policy information
2760  * @param nice_name  New name for this process
2761  * @param is_child_zygote  If the process is to become a WebViewZygote
2762  * @param instruction_set  The instruction set expected/requested by the new application
2763  * @param app_data_dir  Path to the application's data directory
2764  * @param is_top_app  If the process is for top (high priority) application
2765  */
com_android_internal_os_Zygote_nativeSpecializeAppProcess(JNIEnv * env,jclass,jint uid,jint gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jint mount_external,jstring se_info,jstring nice_name,jboolean is_child_zygote,jstring instruction_set,jstring app_data_dir,jboolean is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,jboolean mount_data_dirs,jboolean mount_storage_dirs,jboolean mount_sysprop_overrides)2766 static void com_android_internal_os_Zygote_nativeSpecializeAppProcess(
2767         JNIEnv* env, jclass, jint uid, jint gid, jintArray gids, jint runtime_flags,
2768         jobjectArray rlimits, jint mount_external, jstring se_info, jstring nice_name,
2769         jboolean is_child_zygote, jstring instruction_set, jstring app_data_dir,
2770         jboolean is_top_app, jobjectArray pkg_data_info_list,
2771         jobjectArray allowlisted_data_info_list, jboolean mount_data_dirs,
2772         jboolean mount_storage_dirs, jboolean mount_sysprop_overrides) {
2773     jlong capabilities = CalculateCapabilities(env, uid, gid, gids, is_child_zygote);
2774     jlong bounding_capabilities = CalculateBoundingCapabilities(env, uid, gid, gids);
2775 
2776     SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, capabilities, capabilities,
2777                      bounding_capabilities, mount_external, se_info, nice_name, false,
2778                      is_child_zygote == JNI_TRUE, instruction_set, app_data_dir,
2779                      is_top_app == JNI_TRUE, pkg_data_info_list, allowlisted_data_info_list,
2780                      mount_data_dirs == JNI_TRUE, mount_storage_dirs == JNI_TRUE,
2781                      mount_sysprop_overrides == JNI_TRUE);
2782 }
2783 
2784 /**
2785  * A helper method for fetching socket file descriptors that were opened by init from the
2786  * environment.
2787  *
2788  * @param env  Managed runtime environment
2789  * @param is_primary  If this process is the primary or secondary Zygote; used to compute the name
2790  * of the environment variable storing the file descriptors.
2791  */
com_android_internal_os_Zygote_nativeInitNativeState(JNIEnv * env,jclass,jboolean is_primary)2792 static void com_android_internal_os_Zygote_nativeInitNativeState(JNIEnv* env, jclass,
2793                                                                  jboolean is_primary) {
2794   /*
2795    * Obtain file descriptors created by init from the environment.
2796    */
2797 
2798   gZygoteSocketFD =
2799       android_get_control_socket(is_primary ? "zygote" : "zygote_secondary");
2800   if (gZygoteSocketFD >= 0) {
2801     ALOGV("Zygote:zygoteSocketFD = %d", gZygoteSocketFD);
2802   } else {
2803     ALOGE("Unable to fetch Zygote socket file descriptor");
2804   }
2805 
2806   gUsapPoolSocketFD =
2807       android_get_control_socket(is_primary ? "usap_pool_primary" : "usap_pool_secondary");
2808   if (gUsapPoolSocketFD >= 0) {
2809     ALOGV("Zygote:usapPoolSocketFD = %d", gUsapPoolSocketFD);
2810   } else {
2811     ALOGE("Unable to fetch USAP pool socket file descriptor");
2812   }
2813 
2814   initUnsolSocketToSystemServer();
2815 
2816   /*
2817    * Security Initialization
2818    */
2819 
2820   // security_getenforce is not allowed on app process. Initialize and cache
2821   // the value before zygote forks.
2822   gIsSecurityEnforced = security_getenforce();
2823 
2824   selinux_android_seapp_context_init();
2825 
2826   /*
2827    * Storage Initialization
2828    */
2829 
2830   UnmountStorageOnInit(env);
2831 
2832   /*
2833    * Performance Initialization
2834    */
2835 
2836   if (!SetTaskProfiles(0, {})) {
2837     zygote::ZygoteFailure(env, "zygote", nullptr, "Zygote SetTaskProfiles failed");
2838   }
2839 }
2840 
2841 /**
2842  * @param env  Managed runtime environment
2843  * @return  A managed array of raw file descriptors for the read ends of the USAP reporting
2844  * pipes.
2845  */
com_android_internal_os_Zygote_nativeGetUsapPipeFDs(JNIEnv * env,jclass)2846 static jintArray com_android_internal_os_Zygote_nativeGetUsapPipeFDs(JNIEnv* env, jclass) {
2847   std::vector<int> usap_fds = MakeUsapPipeReadFDVector();
2848 
2849   jintArray managed_usap_fds = env->NewIntArray(usap_fds.size());
2850   env->SetIntArrayRegion(managed_usap_fds, 0, usap_fds.size(), usap_fds.data());
2851 
2852   return managed_usap_fds;
2853 }
2854 
2855 /*
2856  * Add the given pid and file descriptor to the Usap table. CriticalNative method.
2857  */
com_android_internal_os_Zygote_nativeAddUsapTableEntry(jint pid,jint read_pipe_fd)2858 static void com_android_internal_os_Zygote_nativeAddUsapTableEntry(jint pid, jint read_pipe_fd) {
2859   AddUsapTableEntry(pid, read_pipe_fd);
2860 }
2861 
2862 /**
2863  * A JNI wrapper around RemoveUsapTableEntry. CriticalNative method.
2864  *
2865  * @param env  Managed runtime environment
2866  * @param usap_pid  Process ID of the USAP entry to invalidate
2867  * @return  True if an entry was invalidated; false otherwise.
2868  */
com_android_internal_os_Zygote_nativeRemoveUsapTableEntry(jint usap_pid)2869 static jboolean com_android_internal_os_Zygote_nativeRemoveUsapTableEntry(jint usap_pid) {
2870   return RemoveUsapTableEntry(usap_pid);
2871 }
2872 
2873 /**
2874  * Creates the USAP pool event FD if it doesn't exist and returns it.  This is used by the
2875  * ZygoteServer poll loop to know when to re-fill the USAP pool.
2876  *
2877  * @param env  Managed runtime environment
2878  * @return A raw event file descriptor used to communicate (from the signal handler) when the
2879  * Zygote receives a SIGCHLD for a USAP
2880  */
com_android_internal_os_Zygote_nativeGetUsapPoolEventFD(JNIEnv * env,jclass)2881 static jint com_android_internal_os_Zygote_nativeGetUsapPoolEventFD(JNIEnv* env, jclass) {
2882   if (gUsapPoolEventFD == -1) {
2883     if ((gUsapPoolEventFD = eventfd(0, 0)) == -1) {
2884       zygote::ZygoteFailure(env, "zygote", nullptr,
2885                             StringPrintf("Unable to create eventfd: %s", strerror(errno)));
2886     }
2887   }
2888 
2889   return gUsapPoolEventFD;
2890 }
2891 
2892 /**
2893  * @param env  Managed runtime environment
2894  * @return The number of USAPs currently in the USAP pool
2895  */
com_android_internal_os_Zygote_nativeGetUsapPoolCount(JNIEnv * env,jclass)2896 static jint com_android_internal_os_Zygote_nativeGetUsapPoolCount(JNIEnv* env, jclass) {
2897   return gUsapPoolCount;
2898 }
2899 
2900 /**
2901  * Kills all processes currently in the USAP pool and closes their read pipe
2902  * FDs.
2903  *
2904  * @param env  Managed runtime environment
2905  */
com_android_internal_os_Zygote_nativeEmptyUsapPool(JNIEnv * env,jclass)2906 static void com_android_internal_os_Zygote_nativeEmptyUsapPool(JNIEnv* env, jclass) {
2907   for (auto& entry : gUsapTable) {
2908     auto entry_storage = entry.GetValues();
2909 
2910     if (entry_storage.has_value()) {
2911       kill(entry_storage.value().pid, SIGTERM);
2912 
2913       // Clean up the USAP table entry here.  This avoids a potential race
2914       // where a newly created USAP might not be able to find a valid table
2915       // entry if signal handler (which would normally do the cleanup) doesn't
2916       // run between now and when the new process is created.
2917 
2918       close(entry_storage.value().read_pipe_fd);
2919 
2920       // Avoid a second atomic load by invalidating instead of clearing.
2921       entry.Invalidate();
2922       --gUsapPoolCount;
2923     }
2924   }
2925 }
2926 
com_android_internal_os_Zygote_nativeBlockSigTerm(JNIEnv * env,jclass)2927 static void com_android_internal_os_Zygote_nativeBlockSigTerm(JNIEnv* env, jclass) {
2928   auto fail_fn = std::bind(zygote::ZygoteFailure, env, "usap", nullptr, _1);
2929   BlockSignal(SIGTERM, fail_fn);
2930 }
2931 
com_android_internal_os_Zygote_nativeUnblockSigTerm(JNIEnv * env,jclass)2932 static void com_android_internal_os_Zygote_nativeUnblockSigTerm(JNIEnv* env, jclass) {
2933   auto fail_fn = std::bind(zygote::ZygoteFailure, env, "usap", nullptr, _1);
2934   UnblockSignal(SIGTERM, fail_fn);
2935 }
2936 
com_android_internal_os_Zygote_nativeBoostUsapPriority(JNIEnv * env,jclass)2937 static void com_android_internal_os_Zygote_nativeBoostUsapPriority(JNIEnv* env, jclass) {
2938   setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MAX);
2939 }
2940 
com_android_internal_os_Zygote_nativeParseSigChld(JNIEnv * env,jclass,jbyteArray in,jint length,jintArray out)2941 static jint com_android_internal_os_Zygote_nativeParseSigChld(JNIEnv* env, jclass, jbyteArray in,
2942                                                               jint length, jintArray out) {
2943     if (length != sizeof(struct UnsolicitedZygoteMessageSigChld)) {
2944         // Apparently it's not the message we are expecting.
2945         return -1;
2946     }
2947     if (in == nullptr || out == nullptr) {
2948         // Invalid parameter
2949         jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2950         return -1;
2951     }
2952     ScopedByteArrayRO source(env, in);
2953     if (source.size() < static_cast<size_t>(length)) {
2954         // Invalid parameter
2955         jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2956         return -1;
2957     }
2958     const struct UnsolicitedZygoteMessageSigChld* msg =
2959             reinterpret_cast<const struct UnsolicitedZygoteMessageSigChld*>(source.get());
2960 
2961     switch (msg->header.type) {
2962         case UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD: {
2963             ScopedIntArrayRW buf(env, out);
2964             if (buf.size() != 3) {
2965                 jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2966                 return UNSOLICITED_ZYGOTE_MESSAGE_TYPE_RESERVED;
2967             }
2968             buf[0] = msg->payload.pid;
2969             buf[1] = msg->payload.uid;
2970             buf[2] = msg->payload.status;
2971             return 3;
2972         }
2973         default:
2974             break;
2975     }
2976     return -1;
2977 }
2978 
com_android_internal_os_Zygote_nativeSupportsMemoryTagging(JNIEnv * env,jclass)2979 static jboolean com_android_internal_os_Zygote_nativeSupportsMemoryTagging(JNIEnv* env, jclass) {
2980 #if defined(__aarch64__)
2981   return mte_supported();
2982 #else
2983   return false;
2984 #endif
2985 }
2986 
com_android_internal_os_Zygote_nativeSupportsTaggedPointers(JNIEnv * env,jclass)2987 static jboolean com_android_internal_os_Zygote_nativeSupportsTaggedPointers(JNIEnv* env, jclass) {
2988 #ifdef __aarch64__
2989   int res = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
2990   return res >= 0 && res & PR_TAGGED_ADDR_ENABLE;
2991 #else
2992   return false;
2993 #endif
2994 }
2995 
com_android_internal_os_Zygote_nativeCurrentTaggingLevel(JNIEnv * env,jclass)2996 static jint com_android_internal_os_Zygote_nativeCurrentTaggingLevel(JNIEnv* env, jclass) {
2997 #if defined(__aarch64__)
2998   int level = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
2999   if (level < 0) {
3000     ALOGE("Failed to get memory tag level: %s", strerror(errno));
3001     return 0;
3002   } else if (!(level & PR_TAGGED_ADDR_ENABLE)) {
3003     return 0;
3004   }
3005   // TBI is only possible on non-MTE hardware.
3006   if (!mte_supported()) {
3007     return MEMORY_TAG_LEVEL_TBI;
3008   }
3009 
3010   switch (level & PR_MTE_TCF_MASK) {
3011     case PR_MTE_TCF_NONE:
3012       return 0;
3013     case PR_MTE_TCF_SYNC:
3014       return MEMORY_TAG_LEVEL_SYNC;
3015     case PR_MTE_TCF_ASYNC:
3016     case PR_MTE_TCF_ASYNC | PR_MTE_TCF_SYNC:
3017       return MEMORY_TAG_LEVEL_ASYNC;
3018     default:
3019       ALOGE("Unknown memory tagging level: %i", level);
3020       return 0;
3021   }
3022 #else // defined(__aarch64__)
3023   return 0;
3024 #endif // defined(__aarch64__)
3025 }
3026 
com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload(JNIEnv * env,jclass)3027 static void com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload(JNIEnv* env, jclass) {
3028     // Ignore invocations when too early or too late.
3029     if (gPreloadFds) {
3030         return;
3031     }
3032 
3033     // App Zygote Preload starts soon. Save FDs remaining open.  After the
3034     // preload finishes newly open files will be determined.
3035     auto fail_fn = std::bind(zygote::ZygoteFailure, env, "zygote", nullptr, _1);
3036     gPreloadFds = GetOpenFds(fail_fn).release();
3037 }
3038 
com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload(JNIEnv * env,jclass)3039 static void com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload(JNIEnv* env, jclass) {
3040     // Ignore invocations when too early or too late.
3041     if (!gPreloadFds || gPreloadFdsExtracted) {
3042         return;
3043     }
3044 
3045     // Find the newly open FDs, if any.
3046     auto fail_fn = std::bind(zygote::ZygoteFailure, env, "zygote", nullptr, _1);
3047     std::unique_ptr<std::set<int>> current_fds = GetOpenFds(fail_fn);
3048     auto difference = std::make_unique<std::set<int>>();
3049     std::set_difference(current_fds->begin(), current_fds->end(), gPreloadFds->begin(),
3050                         gPreloadFds->end(), std::inserter(*difference, difference->end()));
3051     delete gPreloadFds;
3052     gPreloadFds = difference.release();
3053     gPreloadFdsExtracted = true;
3054 }
3055 
3056 static const JNINativeMethod gMethods[] = {
nativeForkAndSpecialize(II[II[[IILjava/lang/String;Ljava/lang/String;[I[IZLjava/lang/String;Ljava/lang/String;Z[Ljava/lang/String;[Ljava/lang/String;ZZZ)3057         {"nativeForkAndSpecialize",
3058          "(II[II[[IILjava/lang/String;Ljava/lang/String;[I[IZLjava/lang/String;Ljava/lang/"
3059          "String;Z[Ljava/lang/String;[Ljava/lang/String;ZZZ)I",
3060          (void*)com_android_internal_os_Zygote_nativeForkAndSpecialize},
nativeForkSystemServer(II[II[[IJJ)3061         {"nativeForkSystemServer", "(II[II[[IJJ)I",
3062          (void*)com_android_internal_os_Zygote_nativeForkSystemServer},
nativeAllowFileAcrossFork(Ljava/lang/String;)3063         {"nativeAllowFileAcrossFork", "(Ljava/lang/String;)V",
3064          (void*)com_android_internal_os_Zygote_nativeAllowFileAcrossFork},
nativePreApplicationInit()3065         {"nativePreApplicationInit", "()V",
3066          (void*)com_android_internal_os_Zygote_nativePreApplicationInit},
nativeInstallSeccompUidGidFilter(II)3067         {"nativeInstallSeccompUidGidFilter", "(II)V",
3068          (void*)com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter},
nativeForkApp(II[IZZ)3069         {"nativeForkApp", "(II[IZZ)I", (void*)com_android_internal_os_Zygote_nativeForkApp},
3070         // @CriticalNative
nativeAddUsapTableEntry(II)3071         {"nativeAddUsapTableEntry", "(II)V",
3072          (void*)com_android_internal_os_Zygote_nativeAddUsapTableEntry},
nativeSpecializeAppProcess(II[II[[IILjava/lang/String;Ljava/lang/String;ZLjava/lang/String;Ljava/lang/String;Z[Ljava/lang/String;[Ljava/lang/String;ZZZ)3073         {"nativeSpecializeAppProcess",
3074          "(II[II[[IILjava/lang/String;Ljava/lang/String;ZLjava/lang/String;Ljava/lang/"
3075          "String;Z[Ljava/lang/String;[Ljava/lang/String;ZZZ)V",
3076          (void*)com_android_internal_os_Zygote_nativeSpecializeAppProcess},
nativeInitNativeState(Z)3077         {"nativeInitNativeState", "(Z)V",
3078          (void*)com_android_internal_os_Zygote_nativeInitNativeState},
nativeGetUsapPipeFDs()3079         {"nativeGetUsapPipeFDs", "()[I",
3080          (void*)com_android_internal_os_Zygote_nativeGetUsapPipeFDs},
3081         // @CriticalNative
nativeAddUsapTableEntry(II)3082         {"nativeAddUsapTableEntry", "(II)V",
3083          (void*)com_android_internal_os_Zygote_nativeAddUsapTableEntry},
3084         // @CriticalNative
nativeRemoveUsapTableEntry(I)3085         {"nativeRemoveUsapTableEntry", "(I)Z",
3086          (void*)com_android_internal_os_Zygote_nativeRemoveUsapTableEntry},
nativeGetUsapPoolEventFD()3087         {"nativeGetUsapPoolEventFD", "()I",
3088          (void*)com_android_internal_os_Zygote_nativeGetUsapPoolEventFD},
nativeGetUsapPoolCount()3089         {"nativeGetUsapPoolCount", "()I",
3090          (void*)com_android_internal_os_Zygote_nativeGetUsapPoolCount},
nativeEmptyUsapPool()3091         {"nativeEmptyUsapPool", "()V", (void*)com_android_internal_os_Zygote_nativeEmptyUsapPool},
nativeBlockSigTerm()3092         {"nativeBlockSigTerm", "()V", (void*)com_android_internal_os_Zygote_nativeBlockSigTerm},
nativeUnblockSigTerm()3093         {"nativeUnblockSigTerm", "()V", (void*)com_android_internal_os_Zygote_nativeUnblockSigTerm},
nativeBoostUsapPriority()3094         {"nativeBoostUsapPriority", "()V",
3095          (void*)com_android_internal_os_Zygote_nativeBoostUsapPriority},
nativeParseSigChld([BI[I)3096         {"nativeParseSigChld", "([BI[I)I",
3097          (void*)com_android_internal_os_Zygote_nativeParseSigChld},
nativeSupportsMemoryTagging()3098         {"nativeSupportsMemoryTagging", "()Z",
3099          (void*)com_android_internal_os_Zygote_nativeSupportsMemoryTagging},
nativeSupportsTaggedPointers()3100         {"nativeSupportsTaggedPointers", "()Z",
3101          (void*)com_android_internal_os_Zygote_nativeSupportsTaggedPointers},
nativeCurrentTaggingLevel()3102         {"nativeCurrentTaggingLevel", "()I",
3103          (void*)com_android_internal_os_Zygote_nativeCurrentTaggingLevel},
nativeMarkOpenedFilesBeforePreload()3104         {"nativeMarkOpenedFilesBeforePreload", "()V",
3105          (void*)com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload},
nativeAllowFilesOpenedByPreload()3106         {"nativeAllowFilesOpenedByPreload", "()V",
3107          (void*)com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload},
3108 };
3109 
register_com_android_internal_os_Zygote(JNIEnv * env)3110 int register_com_android_internal_os_Zygote(JNIEnv* env) {
3111   gZygoteClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteClassName));
3112   gCallPostForkSystemServerHooks = GetStaticMethodIDOrDie(env, gZygoteClass,
3113                                                           "callPostForkSystemServerHooks",
3114                                                           "(I)V");
3115   gCallPostForkChildHooks = GetStaticMethodIDOrDie(env, gZygoteClass, "callPostForkChildHooks",
3116                                                    "(IZZLjava/lang/String;)V");
3117 
3118   gZygoteInitClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteInitClassName));
3119   gGetOrCreateSystemServerClassLoader =
3120           GetStaticMethodIDOrDie(env, gZygoteInitClass, "getOrCreateSystemServerClassLoader",
3121                                  "()Ljava/lang/ClassLoader;");
3122   gPrefetchStandaloneSystemServerJars =
3123           GetStaticMethodIDOrDie(env, gZygoteInitClass, "prefetchStandaloneSystemServerJars",
3124                                  "()V");
3125 
3126   RegisterMethodsOrDie(env, "com/android/internal/os/Zygote", gMethods, NELEM(gMethods));
3127 
3128   return JNI_OK;
3129 }
3130 }  // namespace android
3131