1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Zygote"
18
19 // sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
20 #include <sys/mount.h>
21 #include <linux/fs.h>
22
23 #include <list>
24 #include <sstream>
25 #include <string>
26
27 #include <fcntl.h>
28 #include <grp.h>
29 #include <inttypes.h>
30 #include <malloc.h>
31 #include <mntent.h>
32 #include <paths.h>
33 #include <signal.h>
34 #include <stdlib.h>
35 #include <sys/capability.h>
36 #include <sys/cdefs.h>
37 #include <sys/personality.h>
38 #include <sys/prctl.h>
39 #include <sys/resource.h>
40 #include <sys/stat.h>
41 #include <sys/time.h>
42 #include <sys/types.h>
43 #include <sys/utsname.h>
44 #include <sys/wait.h>
45 #include <unistd.h>
46
47 #include "android-base/logging.h"
48 #include <android-base/file.h>
49 #include <android-base/stringprintf.h>
50 #include <cutils/fs.h>
51 #include <cutils/multiuser.h>
52 #include <cutils/sched_policy.h>
53 #include <private/android_filesystem_config.h>
54 #include <utils/String8.h>
55 #include <selinux/android.h>
56 #include <seccomp_policy.h>
57 #include <processgroup/processgroup.h>
58
59 #include "core_jni_helpers.h"
60 #include <nativehelper/JNIHelp.h>
61 #include <nativehelper/ScopedLocalRef.h>
62 #include <nativehelper/ScopedPrimitiveArray.h>
63 #include <nativehelper/ScopedUtfChars.h>
64 #include "fd_utils.h"
65
66 #include "nativebridge/native_bridge.h"
67
68 namespace {
69
70 using android::String8;
71 using android::base::StringPrintf;
72 using android::base::WriteStringToFile;
73
74 #define CREATE_ERROR(...) StringPrintf("%s:%d: ", __FILE__, __LINE__). \
75 append(StringPrintf(__VA_ARGS__))
76
77 static pid_t gSystemServerPid = 0;
78
79 static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
80 static jclass gZygoteClass;
81 static jmethodID gCallPostForkChildHooks;
82
83 static bool g_is_security_enforced = true;
84
85 // Must match values in com.android.internal.os.Zygote.
86 enum MountExternalKind {
87 MOUNT_EXTERNAL_NONE = 0,
88 MOUNT_EXTERNAL_DEFAULT = 1,
89 MOUNT_EXTERNAL_READ = 2,
90 MOUNT_EXTERNAL_WRITE = 3,
91 };
92
RuntimeAbort(JNIEnv * env,int line,const char * msg)93 static void RuntimeAbort(JNIEnv* env, int line, const char* msg) {
94 std::ostringstream oss;
95 oss << __FILE__ << ":" << line << ": " << msg;
96 env->FatalError(oss.str().c_str());
97 }
98
99 // This signal handler is for zygote mode, since the zygote must reap its children
SigChldHandler(int)100 static void SigChldHandler(int /*signal_number*/) {
101 pid_t pid;
102 int status;
103
104 // It's necessary to save and restore the errno during this function.
105 // Since errno is stored per thread, changing it here modifies the errno
106 // on the thread on which this signal handler executes. If a signal occurs
107 // between a call and an errno check, it's possible to get the errno set
108 // here.
109 // See b/23572286 for extra information.
110 int saved_errno = errno;
111
112 while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
113 // Log process-death status that we care about. In general it is
114 // not safe to call LOG(...) from a signal handler because of
115 // possible reentrancy. However, we know a priori that the
116 // current implementation of LOG() is safe to call from a SIGCHLD
117 // handler in the zygote process. If the LOG() implementation
118 // changes its locking strategy or its use of syscalls within the
119 // lazy-init critical section, its use here may become unsafe.
120 if (WIFEXITED(status)) {
121 ALOGI("Process %d exited cleanly (%d)", pid, WEXITSTATUS(status));
122 } else if (WIFSIGNALED(status)) {
123 ALOGI("Process %d exited due to signal (%d)", pid, WTERMSIG(status));
124 if (WCOREDUMP(status)) {
125 ALOGI("Process %d dumped core.", pid);
126 }
127 }
128
129 // If the just-crashed process is the system_server, bring down zygote
130 // so that it is restarted by init and system server will be restarted
131 // from there.
132 if (pid == gSystemServerPid) {
133 ALOGE("Exit zygote because system server (%d) has terminated", pid);
134 kill(getpid(), SIGKILL);
135 }
136 }
137
138 // Note that we shouldn't consider ECHILD an error because
139 // the secondary zygote might have no children left to wait for.
140 if (pid < 0 && errno != ECHILD) {
141 ALOGW("Zygote SIGCHLD error in waitpid: %s", strerror(errno));
142 }
143
144 errno = saved_errno;
145 }
146
147 // Configures the SIGCHLD/SIGHUP handlers for the zygote process. This is
148 // configured very late, because earlier in the runtime we may fork() and
149 // exec() other processes, and we want to waitpid() for those rather than
150 // have them be harvested immediately.
151 //
152 // Ignore SIGHUP because all processes forked by the zygote are in the same
153 // process group as the zygote and we don't want to be notified if we become
154 // an orphaned group and have one or more stopped processes. This is not a
155 // theoretical concern :
156 // - we can become an orphaned group if one of our direct descendants forks
157 // and is subsequently killed before its children.
158 // - crash_dump routinely STOPs the process it's tracing.
159 //
160 // See issues b/71965619 and b/25567761 for further details.
161 //
162 // This ends up being called repeatedly before each fork(), but there's
163 // no real harm in that.
SetSignalHandlers()164 static void SetSignalHandlers() {
165 struct sigaction sig_chld = {};
166 sig_chld.sa_handler = SigChldHandler;
167
168 if (sigaction(SIGCHLD, &sig_chld, NULL) < 0) {
169 ALOGW("Error setting SIGCHLD handler: %s", strerror(errno));
170 }
171
172 struct sigaction sig_hup = {};
173 sig_hup.sa_handler = SIG_IGN;
174 if (sigaction(SIGHUP, &sig_hup, NULL) < 0) {
175 ALOGW("Error setting SIGHUP handler: %s", strerror(errno));
176 }
177 }
178
179 // Sets the SIGCHLD handler back to default behavior in zygote children.
UnsetChldSignalHandler()180 static void UnsetChldSignalHandler() {
181 struct sigaction sa;
182 memset(&sa, 0, sizeof(sa));
183 sa.sa_handler = SIG_DFL;
184
185 if (sigaction(SIGCHLD, &sa, NULL) < 0) {
186 ALOGW("Error unsetting SIGCHLD handler: %s", strerror(errno));
187 }
188 }
189
190 // Calls POSIX setgroups() using the int[] object as an argument.
191 // A NULL argument is tolerated.
SetGids(JNIEnv * env,jintArray javaGids,std::string * error_msg)192 static bool SetGids(JNIEnv* env, jintArray javaGids, std::string* error_msg) {
193 if (javaGids == NULL) {
194 return true;
195 }
196
197 ScopedIntArrayRO gids(env, javaGids);
198 if (gids.get() == NULL) {
199 *error_msg = CREATE_ERROR("Getting gids int array failed");
200 return false;
201 }
202 int rc = setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0]));
203 if (rc == -1) {
204 *error_msg = CREATE_ERROR("setgroups failed: %s, gids.size=%zu", strerror(errno), gids.size());
205 return false;
206 }
207
208 return true;
209 }
210
211 // Sets the resource limits via setrlimit(2) for the values in the
212 // two-dimensional array of integers that's passed in. The second dimension
213 // contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is
214 // treated as an empty array.
SetRLimits(JNIEnv * env,jobjectArray javaRlimits,std::string * error_msg)215 static bool SetRLimits(JNIEnv* env, jobjectArray javaRlimits, std::string* error_msg) {
216 if (javaRlimits == NULL) {
217 return true;
218 }
219
220 rlimit rlim;
221 memset(&rlim, 0, sizeof(rlim));
222
223 for (int i = 0; i < env->GetArrayLength(javaRlimits); ++i) {
224 ScopedLocalRef<jobject> javaRlimitObject(env, env->GetObjectArrayElement(javaRlimits, i));
225 ScopedIntArrayRO javaRlimit(env, reinterpret_cast<jintArray>(javaRlimitObject.get()));
226 if (javaRlimit.size() != 3) {
227 *error_msg = CREATE_ERROR("rlimits array must have a second dimension of size 3");
228 return false;
229 }
230
231 rlim.rlim_cur = javaRlimit[1];
232 rlim.rlim_max = javaRlimit[2];
233
234 int rc = setrlimit(javaRlimit[0], &rlim);
235 if (rc == -1) {
236 *error_msg = CREATE_ERROR("setrlimit(%d, {%ld, %ld}) failed", javaRlimit[0], rlim.rlim_cur,
237 rlim.rlim_max);
238 return false;
239 }
240 }
241
242 return true;
243 }
244
245 // The debug malloc library needs to know whether it's the zygote or a child.
246 extern "C" int gMallocLeakZygoteChild;
247
PreApplicationInit()248 static void PreApplicationInit() {
249 // The child process sets this to indicate it's not the zygote.
250 gMallocLeakZygoteChild = 1;
251
252 // Set the jemalloc decay time to 1.
253 mallopt(M_DECAY_TIME, 1);
254 }
255
SetUpSeccompFilter(uid_t uid)256 static void SetUpSeccompFilter(uid_t uid) {
257 if (!g_is_security_enforced) {
258 ALOGI("seccomp disabled by setenforce 0");
259 return;
260 }
261
262 // Apply system or app filter based on uid.
263 if (uid >= AID_APP_START) {
264 set_app_seccomp_filter();
265 } else {
266 set_system_seccomp_filter();
267 }
268 }
269
EnableKeepCapabilities(std::string * error_msg)270 static bool EnableKeepCapabilities(std::string* error_msg) {
271 int rc = prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0);
272 if (rc == -1) {
273 *error_msg = CREATE_ERROR("prctl(PR_SET_KEEPCAPS) failed: %s", strerror(errno));
274 return false;
275 }
276 return true;
277 }
278
DropCapabilitiesBoundingSet(std::string * error_msg)279 static bool DropCapabilitiesBoundingSet(std::string* error_msg) {
280 for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {
281 int rc = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
282 if (rc == -1) {
283 if (errno == EINVAL) {
284 ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
285 "your kernel is compiled with file capabilities support");
286 } else {
287 *error_msg = CREATE_ERROR("prctl(PR_CAPBSET_DROP, %d) failed: %s", i, strerror(errno));
288 return false;
289 }
290 }
291 }
292 return true;
293 }
294
SetInheritable(uint64_t inheritable,std::string * error_msg)295 static bool SetInheritable(uint64_t inheritable, std::string* error_msg) {
296 __user_cap_header_struct capheader;
297 memset(&capheader, 0, sizeof(capheader));
298 capheader.version = _LINUX_CAPABILITY_VERSION_3;
299 capheader.pid = 0;
300
301 __user_cap_data_struct capdata[2];
302 if (capget(&capheader, &capdata[0]) == -1) {
303 *error_msg = CREATE_ERROR("capget failed: %s", strerror(errno));
304 return false;
305 }
306
307 capdata[0].inheritable = inheritable;
308 capdata[1].inheritable = inheritable >> 32;
309
310 if (capset(&capheader, &capdata[0]) == -1) {
311 *error_msg = CREATE_ERROR("capset(inh=%" PRIx64 ") failed: %s", inheritable, strerror(errno));
312 return false;
313 }
314
315 return true;
316 }
317
SetCapabilities(uint64_t permitted,uint64_t effective,uint64_t inheritable,std::string * error_msg)318 static bool SetCapabilities(uint64_t permitted, uint64_t effective, uint64_t inheritable,
319 std::string* error_msg) {
320 __user_cap_header_struct capheader;
321 memset(&capheader, 0, sizeof(capheader));
322 capheader.version = _LINUX_CAPABILITY_VERSION_3;
323 capheader.pid = 0;
324
325 __user_cap_data_struct capdata[2];
326 memset(&capdata, 0, sizeof(capdata));
327 capdata[0].effective = effective;
328 capdata[1].effective = effective >> 32;
329 capdata[0].permitted = permitted;
330 capdata[1].permitted = permitted >> 32;
331 capdata[0].inheritable = inheritable;
332 capdata[1].inheritable = inheritable >> 32;
333
334 if (capset(&capheader, &capdata[0]) == -1) {
335 *error_msg = CREATE_ERROR("capset(perm=%" PRIx64 ", eff=%" PRIx64 ", inh=%" PRIx64 ") "
336 "failed: %s", permitted, effective, inheritable, strerror(errno));
337 return false;
338 }
339 return true;
340 }
341
SetSchedulerPolicy(std::string * error_msg)342 static bool SetSchedulerPolicy(std::string* error_msg) {
343 errno = -set_sched_policy(0, SP_DEFAULT);
344 if (errno != 0) {
345 *error_msg = CREATE_ERROR("set_sched_policy(0, SP_DEFAULT) failed: %s", strerror(errno));
346 return false;
347 }
348 return true;
349 }
350
UnmountTree(const char * path)351 static int UnmountTree(const char* path) {
352 size_t path_len = strlen(path);
353
354 FILE* fp = setmntent("/proc/mounts", "r");
355 if (fp == NULL) {
356 ALOGE("Error opening /proc/mounts: %s", strerror(errno));
357 return -errno;
358 }
359
360 // Some volumes can be stacked on each other, so force unmount in
361 // reverse order to give us the best chance of success.
362 std::list<std::string> toUnmount;
363 mntent* mentry;
364 while ((mentry = getmntent(fp)) != NULL) {
365 if (strncmp(mentry->mnt_dir, path, path_len) == 0) {
366 toUnmount.push_front(std::string(mentry->mnt_dir));
367 }
368 }
369 endmntent(fp);
370
371 for (auto path : toUnmount) {
372 if (umount2(path.c_str(), MNT_DETACH)) {
373 ALOGW("Failed to unmount %s: %s", path.c_str(), strerror(errno));
374 }
375 }
376 return 0;
377 }
378
379 // Create a private mount namespace and bind mount appropriate emulated
380 // storage for the given user.
MountEmulatedStorage(uid_t uid,jint mount_mode,bool force_mount_namespace,std::string * error_msg)381 static bool MountEmulatedStorage(uid_t uid, jint mount_mode,
382 bool force_mount_namespace, std::string* error_msg) {
383 // See storage config details at http://source.android.com/tech/storage/
384
385 String8 storageSource;
386 if (mount_mode == MOUNT_EXTERNAL_DEFAULT) {
387 storageSource = "/mnt/runtime/default";
388 } else if (mount_mode == MOUNT_EXTERNAL_READ) {
389 storageSource = "/mnt/runtime/read";
390 } else if (mount_mode == MOUNT_EXTERNAL_WRITE) {
391 storageSource = "/mnt/runtime/write";
392 } else if (!force_mount_namespace) {
393 // Sane default of no storage visible
394 return true;
395 }
396
397 // Create a second private mount namespace for our process
398 if (unshare(CLONE_NEWNS) == -1) {
399 *error_msg = CREATE_ERROR("Failed to unshare(): %s", strerror(errno));
400 return false;
401 }
402
403 // Handle force_mount_namespace with MOUNT_EXTERNAL_NONE.
404 if (mount_mode == MOUNT_EXTERNAL_NONE) {
405 return true;
406 }
407
408 if (TEMP_FAILURE_RETRY(mount(storageSource.string(), "/storage",
409 NULL, MS_BIND | MS_REC | MS_SLAVE, NULL)) == -1) {
410 *error_msg = CREATE_ERROR("Failed to mount %s to /storage: %s",
411 storageSource.string(),
412 strerror(errno));
413 return false;
414 }
415
416 // Mount user-specific symlink helper into place
417 userid_t user_id = multiuser_get_user_id(uid);
418 const String8 userSource(String8::format("/mnt/user/%d", user_id));
419 if (fs_prepare_dir(userSource.string(), 0751, 0, 0) == -1) {
420 *error_msg = CREATE_ERROR("fs_prepare_dir failed on %s", userSource.string());
421 return false;
422 }
423 if (TEMP_FAILURE_RETRY(mount(userSource.string(), "/storage/self",
424 NULL, MS_BIND, NULL)) == -1) {
425 *error_msg = CREATE_ERROR("Failed to mount %s to /storage/self: %s",
426 userSource.string(),
427 strerror(errno));
428 return false;
429 }
430
431 return true;
432 }
433
NeedsNoRandomizeWorkaround()434 static bool NeedsNoRandomizeWorkaround() {
435 #if !defined(__arm__)
436 return false;
437 #else
438 int major;
439 int minor;
440 struct utsname uts;
441 if (uname(&uts) == -1) {
442 return false;
443 }
444
445 if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
446 return false;
447 }
448
449 // Kernels before 3.4.* need the workaround.
450 return (major < 3) || ((major == 3) && (minor < 4));
451 #endif
452 }
453
454 // Utility to close down the Zygote socket file descriptors while
455 // the child is still running as root with Zygote's privileges. Each
456 // descriptor (if any) is closed via dup2(), replacing it with a valid
457 // (open) descriptor to /dev/null.
458
DetachDescriptors(JNIEnv * env,jintArray fdsToClose,std::string * error_msg)459 static bool DetachDescriptors(JNIEnv* env, jintArray fdsToClose, std::string* error_msg) {
460 if (!fdsToClose) {
461 return true;
462 }
463 jsize count = env->GetArrayLength(fdsToClose);
464 ScopedIntArrayRO ar(env, fdsToClose);
465 if (ar.get() == NULL) {
466 *error_msg = "Bad fd array";
467 return false;
468 }
469 jsize i;
470 int devnull;
471 for (i = 0; i < count; i++) {
472 devnull = open("/dev/null", O_RDWR);
473 if (devnull < 0) {
474 *error_msg = std::string("Failed to open /dev/null: ").append(strerror(errno));
475 return false;
476 }
477 ALOGV("Switching descriptor %d to /dev/null: %s", ar[i], strerror(errno));
478 if (dup2(devnull, ar[i]) < 0) {
479 *error_msg = StringPrintf("Failed dup2() on descriptor %d: %s", ar[i], strerror(errno));
480 return false;
481 }
482 close(devnull);
483 }
484 return true;
485 }
486
SetThreadName(const char * thread_name)487 void SetThreadName(const char* thread_name) {
488 bool hasAt = false;
489 bool hasDot = false;
490 const char* s = thread_name;
491 while (*s) {
492 if (*s == '.') {
493 hasDot = true;
494 } else if (*s == '@') {
495 hasAt = true;
496 }
497 s++;
498 }
499 const int len = s - thread_name;
500 if (len < 15 || hasAt || !hasDot) {
501 s = thread_name;
502 } else {
503 s = thread_name + len - 15;
504 }
505 // pthread_setname_np fails rather than truncating long strings.
506 char buf[16]; // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
507 strlcpy(buf, s, sizeof(buf)-1);
508 errno = pthread_setname_np(pthread_self(), buf);
509 if (errno != 0) {
510 ALOGW("Unable to set the name of current thread to '%s': %s", buf, strerror(errno));
511 }
512 // Update base::logging default tag.
513 android::base::SetDefaultTag(buf);
514 }
515
516 // The list of open zygote file descriptors.
517 static FileDescriptorTable* gOpenFdTable = NULL;
518
FillFileDescriptorVector(JNIEnv * env,jintArray java_fds,std::vector<int> * fds,std::string * error_msg)519 static bool FillFileDescriptorVector(JNIEnv* env,
520 jintArray java_fds,
521 std::vector<int>* fds,
522 std::string* error_msg) {
523 CHECK(fds != nullptr);
524 if (java_fds != nullptr) {
525 ScopedIntArrayRO ar(env, java_fds);
526 if (ar.get() == nullptr) {
527 *error_msg = "Bad fd array";
528 return false;
529 }
530 fds->reserve(ar.size());
531 for (size_t i = 0; i < ar.size(); ++i) {
532 fds->push_back(ar[i]);
533 }
534 }
535 return true;
536 }
537
538 // Utility routine to fork zygote and specialize the child process.
ForkAndSpecializeCommon(JNIEnv * env,uid_t uid,gid_t gid,jintArray javaGids,jint runtime_flags,jobjectArray javaRlimits,jlong permittedCapabilities,jlong effectiveCapabilities,jint mount_external,jstring java_se_info,jstring java_se_name,bool is_system_server,jintArray fdsToClose,jintArray fdsToIgnore,bool is_child_zygote,jstring instructionSet,jstring dataDir)539 static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids,
540 jint runtime_flags, jobjectArray javaRlimits,
541 jlong permittedCapabilities, jlong effectiveCapabilities,
542 jint mount_external,
543 jstring java_se_info, jstring java_se_name,
544 bool is_system_server, jintArray fdsToClose,
545 jintArray fdsToIgnore, bool is_child_zygote,
546 jstring instructionSet, jstring dataDir) {
547 SetSignalHandlers();
548
549 sigset_t sigchld;
550 sigemptyset(&sigchld);
551 sigaddset(&sigchld, SIGCHLD);
552
553 auto fail_fn = [env, java_se_name, is_system_server](const std::string& msg)
554 __attribute__ ((noreturn)) {
555 const char* se_name_c_str = nullptr;
556 std::unique_ptr<ScopedUtfChars> se_name;
557 if (java_se_name != nullptr) {
558 se_name.reset(new ScopedUtfChars(env, java_se_name));
559 se_name_c_str = se_name->c_str();
560 }
561 if (se_name_c_str == nullptr && is_system_server) {
562 se_name_c_str = "system_server";
563 }
564 const std::string& error_msg = (se_name_c_str == nullptr)
565 ? msg
566 : StringPrintf("(%s) %s", se_name_c_str, msg.c_str());
567 env->FatalError(error_msg.c_str());
568 __builtin_unreachable();
569 };
570
571 // Temporarily block SIGCHLD during forks. The SIGCHLD handler might
572 // log, which would result in the logging FDs we close being reopened.
573 // This would cause failures because the FDs are not whitelisted.
574 //
575 // Note that the zygote process is single threaded at this point.
576 if (sigprocmask(SIG_BLOCK, &sigchld, nullptr) == -1) {
577 fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno)));
578 }
579
580 // Close any logging related FDs before we start evaluating the list of
581 // file descriptors.
582 __android_log_close();
583
584 std::string error_msg;
585
586 // If this is the first fork for this zygote, create the open FD table.
587 // If it isn't, we just need to check whether the list of open files has
588 // changed (and it shouldn't in the normal case).
589 std::vector<int> fds_to_ignore;
590 if (!FillFileDescriptorVector(env, fdsToIgnore, &fds_to_ignore, &error_msg)) {
591 fail_fn(error_msg);
592 }
593 if (gOpenFdTable == NULL) {
594 gOpenFdTable = FileDescriptorTable::Create(fds_to_ignore, &error_msg);
595 if (gOpenFdTable == NULL) {
596 fail_fn(error_msg);
597 }
598 } else if (!gOpenFdTable->Restat(fds_to_ignore, &error_msg)) {
599 fail_fn(error_msg);
600 }
601
602 pid_t pid = fork();
603
604 if (pid == 0) {
605 PreApplicationInit();
606
607 // Clean up any descriptors which must be closed immediately
608 if (!DetachDescriptors(env, fdsToClose, &error_msg)) {
609 fail_fn(error_msg);
610 }
611
612 // Re-open all remaining open file descriptors so that they aren't shared
613 // with the zygote across a fork.
614 if (!gOpenFdTable->ReopenOrDetach(&error_msg)) {
615 fail_fn(error_msg);
616 }
617
618 if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) {
619 fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno)));
620 }
621
622 // Keep capabilities across UID change, unless we're staying root.
623 if (uid != 0) {
624 if (!EnableKeepCapabilities(&error_msg)) {
625 fail_fn(error_msg);
626 }
627 }
628
629 if (!SetInheritable(permittedCapabilities, &error_msg)) {
630 fail_fn(error_msg);
631 }
632 if (!DropCapabilitiesBoundingSet(&error_msg)) {
633 fail_fn(error_msg);
634 }
635
636 bool use_native_bridge = !is_system_server && (instructionSet != NULL)
637 && android::NativeBridgeAvailable();
638 if (use_native_bridge) {
639 ScopedUtfChars isa_string(env, instructionSet);
640 use_native_bridge = android::NeedsNativeBridge(isa_string.c_str());
641 }
642 if (use_native_bridge && dataDir == NULL) {
643 // dataDir should never be null if we need to use a native bridge.
644 // In general, dataDir will never be null for normal applications. It can only happen in
645 // special cases (for isolated processes which are not associated with any app). These are
646 // launched by the framework and should not be emulated anyway.
647 use_native_bridge = false;
648 ALOGW("Native bridge will not be used because dataDir == NULL.");
649 }
650
651 if (!MountEmulatedStorage(uid, mount_external, use_native_bridge, &error_msg)) {
652 ALOGW("Failed to mount emulated storage: %s (%s)", error_msg.c_str(), strerror(errno));
653 if (errno == ENOTCONN || errno == EROFS) {
654 // When device is actively encrypting, we get ENOTCONN here
655 // since FUSE was mounted before the framework restarted.
656 // When encrypted device is booting, we get EROFS since
657 // FUSE hasn't been created yet by init.
658 // In either case, continue without external storage.
659 } else {
660 fail_fn(error_msg);
661 }
662 }
663
664 // If this zygote isn't root, it won't be able to create a process group,
665 // since the directory is owned by root.
666 if (!is_system_server && getuid() == 0) {
667 int rc = createProcessGroup(uid, getpid());
668 if (rc != 0) {
669 if (rc == -EROFS) {
670 ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
671 } else {
672 ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc));
673 }
674 }
675 }
676
677 std::string error_msg;
678 if (!SetGids(env, javaGids, &error_msg)) {
679 fail_fn(error_msg);
680 }
681
682 if (!SetRLimits(env, javaRlimits, &error_msg)) {
683 fail_fn(error_msg);
684 }
685
686 if (use_native_bridge) {
687 ScopedUtfChars isa_string(env, instructionSet);
688 ScopedUtfChars data_dir(env, dataDir);
689 android::PreInitializeNativeBridge(data_dir.c_str(), isa_string.c_str());
690 }
691
692 int rc = setresgid(gid, gid, gid);
693 if (rc == -1) {
694 fail_fn(CREATE_ERROR("setresgid(%d) failed: %s", gid, strerror(errno)));
695 }
696
697 // Must be called when the new process still has CAP_SYS_ADMIN, in this case, before changing
698 // uid from 0, which clears capabilities. The other alternative is to call
699 // prctl(PR_SET_NO_NEW_PRIVS, 1) afterward, but that breaks SELinux domain transition (see
700 // b/71859146). As the result, privileged syscalls used below still need to be accessible in
701 // app process.
702 SetUpSeccompFilter(uid);
703
704 rc = setresuid(uid, uid, uid);
705 if (rc == -1) {
706 fail_fn(CREATE_ERROR("setresuid(%d) failed: %s", uid, strerror(errno)));
707 }
708
709 if (NeedsNoRandomizeWorkaround()) {
710 // Work around ARM kernel ASLR lossage (http://b/5817320).
711 int old_personality = personality(0xffffffff);
712 int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE);
713 if (new_personality == -1) {
714 ALOGW("personality(%d) failed: %s", new_personality, strerror(errno));
715 }
716 }
717
718 if (!SetCapabilities(permittedCapabilities, effectiveCapabilities, permittedCapabilities,
719 &error_msg)) {
720 fail_fn(error_msg);
721 }
722
723 if (!SetSchedulerPolicy(&error_msg)) {
724 fail_fn(error_msg);
725 }
726
727 const char* se_info_c_str = NULL;
728 ScopedUtfChars* se_info = NULL;
729 if (java_se_info != NULL) {
730 se_info = new ScopedUtfChars(env, java_se_info);
731 se_info_c_str = se_info->c_str();
732 if (se_info_c_str == NULL) {
733 fail_fn("se_info_c_str == NULL");
734 }
735 }
736 const char* se_name_c_str = NULL;
737 ScopedUtfChars* se_name = NULL;
738 if (java_se_name != NULL) {
739 se_name = new ScopedUtfChars(env, java_se_name);
740 se_name_c_str = se_name->c_str();
741 if (se_name_c_str == NULL) {
742 fail_fn("se_name_c_str == NULL");
743 }
744 }
745 rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str);
746 if (rc == -1) {
747 fail_fn(CREATE_ERROR("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
748 is_system_server, se_info_c_str, se_name_c_str));
749 }
750
751 // Make it easier to debug audit logs by setting the main thread's name to the
752 // nice name rather than "app_process".
753 if (se_name_c_str == NULL && is_system_server) {
754 se_name_c_str = "system_server";
755 }
756 if (se_name_c_str != NULL) {
757 SetThreadName(se_name_c_str);
758 }
759
760 delete se_info;
761 delete se_name;
762
763 // Unset the SIGCHLD handler, but keep ignoring SIGHUP (rationale in SetSignalHandlers).
764 UnsetChldSignalHandler();
765
766 env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, runtime_flags,
767 is_system_server, is_child_zygote, instructionSet);
768 if (env->ExceptionCheck()) {
769 fail_fn("Error calling post fork hooks.");
770 }
771 } else if (pid > 0) {
772 // the parent process
773
774 // We blocked SIGCHLD prior to a fork, we unblock it here.
775 if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) {
776 fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno)));
777 }
778 }
779 return pid;
780 }
781
GetEffectiveCapabilityMask(JNIEnv * env)782 static uint64_t GetEffectiveCapabilityMask(JNIEnv* env) {
783 __user_cap_header_struct capheader;
784 memset(&capheader, 0, sizeof(capheader));
785 capheader.version = _LINUX_CAPABILITY_VERSION_3;
786 capheader.pid = 0;
787
788 __user_cap_data_struct capdata[2];
789 if (capget(&capheader, &capdata[0]) == -1) {
790 ALOGE("capget failed: %s", strerror(errno));
791 RuntimeAbort(env, __LINE__, "capget failed");
792 }
793
794 return capdata[0].effective |
795 (static_cast<uint64_t>(capdata[1].effective) << 32);
796 }
797 } // anonymous namespace
798
799 namespace android {
800
com_android_internal_os_Zygote_nativeSecurityInit(JNIEnv *,jclass)801 static void com_android_internal_os_Zygote_nativeSecurityInit(JNIEnv*, jclass) {
802 // security_getenforce is not allowed on app process. Initialize and cache the value before
803 // zygote forks.
804 g_is_security_enforced = security_getenforce();
805 }
806
com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv *,jclass)807 static void com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv*, jclass) {
808 PreApplicationInit();
809 }
810
com_android_internal_os_Zygote_nativeForkAndSpecialize(JNIEnv * env,jclass,jint uid,jint gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jint mount_external,jstring se_info,jstring se_name,jintArray fdsToClose,jintArray fdsToIgnore,jboolean is_child_zygote,jstring instructionSet,jstring appDataDir)811 static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
812 JNIEnv* env, jclass, jint uid, jint gid, jintArray gids,
813 jint runtime_flags, jobjectArray rlimits,
814 jint mount_external, jstring se_info, jstring se_name,
815 jintArray fdsToClose, jintArray fdsToIgnore, jboolean is_child_zygote,
816 jstring instructionSet, jstring appDataDir) {
817 jlong capabilities = 0;
818
819 // Grant CAP_WAKE_ALARM to the Bluetooth process.
820 // Additionally, allow bluetooth to open packet sockets so it can start the DHCP client.
821 // Grant CAP_SYS_NICE to allow Bluetooth to set RT priority for
822 // audio-related threads.
823 // TODO: consider making such functionality an RPC to netd.
824 if (multiuser_get_app_id(uid) == AID_BLUETOOTH) {
825 capabilities |= (1LL << CAP_WAKE_ALARM);
826 capabilities |= (1LL << CAP_NET_RAW);
827 capabilities |= (1LL << CAP_NET_BIND_SERVICE);
828 capabilities |= (1LL << CAP_SYS_NICE);
829 }
830
831 // Grant CAP_BLOCK_SUSPEND to processes that belong to GID "wakelock"
832 bool gid_wakelock_found = false;
833 if (gid == AID_WAKELOCK) {
834 gid_wakelock_found = true;
835 } else if (gids != NULL) {
836 jsize gids_num = env->GetArrayLength(gids);
837 ScopedIntArrayRO ar(env, gids);
838 if (ar.get() == NULL) {
839 RuntimeAbort(env, __LINE__, "Bad gids array");
840 }
841 for (int i = 0; i < gids_num; i++) {
842 if (ar[i] == AID_WAKELOCK) {
843 gid_wakelock_found = true;
844 break;
845 }
846 }
847 }
848 if (gid_wakelock_found) {
849 capabilities |= (1LL << CAP_BLOCK_SUSPEND);
850 }
851
852 // If forking a child zygote process, that zygote will need to be able to change
853 // the UID and GID of processes it forks, as well as drop those capabilities.
854 if (is_child_zygote) {
855 capabilities |= (1LL << CAP_SETUID);
856 capabilities |= (1LL << CAP_SETGID);
857 capabilities |= (1LL << CAP_SETPCAP);
858 }
859
860 // Containers run without some capabilities, so drop any caps that are not
861 // available.
862 capabilities &= GetEffectiveCapabilityMask(env);
863
864 return ForkAndSpecializeCommon(env, uid, gid, gids, runtime_flags,
865 rlimits, capabilities, capabilities, mount_external, se_info,
866 se_name, false, fdsToClose, fdsToIgnore, is_child_zygote == JNI_TRUE,
867 instructionSet, appDataDir);
868 }
869
com_android_internal_os_Zygote_nativeForkSystemServer(JNIEnv * env,jclass,uid_t uid,gid_t gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jlong permittedCapabilities,jlong effectiveCapabilities)870 static jint com_android_internal_os_Zygote_nativeForkSystemServer(
871 JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
872 jint runtime_flags, jobjectArray rlimits, jlong permittedCapabilities,
873 jlong effectiveCapabilities) {
874 pid_t pid = ForkAndSpecializeCommon(env, uid, gid, gids,
875 runtime_flags, rlimits,
876 permittedCapabilities, effectiveCapabilities,
877 MOUNT_EXTERNAL_DEFAULT, NULL, NULL, true, NULL,
878 NULL, false, NULL, NULL);
879 if (pid > 0) {
880 // The zygote process checks whether the child process has died or not.
881 ALOGI("System server process %d has been created", pid);
882 gSystemServerPid = pid;
883 // There is a slight window that the system server process has crashed
884 // but it went unnoticed because we haven't published its pid yet. So
885 // we recheck here just to make sure that all is well.
886 int status;
887 if (waitpid(pid, &status, WNOHANG) == pid) {
888 ALOGE("System server process %d has died. Restarting Zygote!", pid);
889 RuntimeAbort(env, __LINE__, "System server process has died. Restarting Zygote!");
890 }
891
892 // Assign system_server to the correct memory cgroup.
893 // Not all devices mount /dev/memcg so check for the file first
894 // to avoid unnecessarily printing errors and denials in the logs.
895 if (!access("/dev/memcg/system/tasks", F_OK) &&
896 !WriteStringToFile(StringPrintf("%d", pid), "/dev/memcg/system/tasks")) {
897 ALOGE("couldn't write %d to /dev/memcg/system/tasks", pid);
898 }
899 }
900 return pid;
901 }
902
com_android_internal_os_Zygote_nativeAllowFileAcrossFork(JNIEnv * env,jclass,jstring path)903 static void com_android_internal_os_Zygote_nativeAllowFileAcrossFork(
904 JNIEnv* env, jclass, jstring path) {
905 ScopedUtfChars path_native(env, path);
906 const char* path_cstr = path_native.c_str();
907 if (!path_cstr) {
908 RuntimeAbort(env, __LINE__, "path_cstr == NULL");
909 }
910 FileDescriptorWhitelist::Get()->Allow(path_cstr);
911 }
912
com_android_internal_os_Zygote_nativeUnmountStorageOnInit(JNIEnv * env,jclass)913 static void com_android_internal_os_Zygote_nativeUnmountStorageOnInit(JNIEnv* env, jclass) {
914 // Zygote process unmount root storage space initially before every child processes are forked.
915 // Every forked child processes (include SystemServer) only mount their own root storage space
916 // and no need unmount storage operation in MountEmulatedStorage method.
917 // Zygote process does not utilize root storage spaces and unshares its mount namespace below.
918
919 // See storage config details at http://source.android.com/tech/storage/
920 // Create private mount namespace shared by all children
921 if (unshare(CLONE_NEWNS) == -1) {
922 RuntimeAbort(env, __LINE__, "Failed to unshare()");
923 return;
924 }
925
926 // Mark rootfs as being a slave so that changes from default
927 // namespace only flow into our children.
928 if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
929 RuntimeAbort(env, __LINE__, "Failed to mount() rootfs as MS_SLAVE");
930 return;
931 }
932
933 // Create a staging tmpfs that is shared by our children; they will
934 // bind mount storage into their respective private namespaces, which
935 // are isolated from each other.
936 const char* target_base = getenv("EMULATED_STORAGE_TARGET");
937 if (target_base != nullptr) {
938 #define STRINGIFY_UID(x) __STRING(x)
939 if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV,
940 "uid=0,gid=" STRINGIFY_UID(AID_SDCARD_R) ",mode=0751") == -1) {
941 ALOGE("Failed to mount tmpfs to %s", target_base);
942 RuntimeAbort(env, __LINE__, "Failed to mount tmpfs");
943 return;
944 }
945 #undef STRINGIFY_UID
946 }
947
948 UnmountTree("/storage");
949 }
950
951 static const JNINativeMethod gMethods[] = {
952 { "nativeSecurityInit", "()V",
953 (void *) com_android_internal_os_Zygote_nativeSecurityInit },
954 { "nativeForkAndSpecialize",
955 "(II[II[[IILjava/lang/String;Ljava/lang/String;[I[IZLjava/lang/String;Ljava/lang/String;)I",
956 (void *) com_android_internal_os_Zygote_nativeForkAndSpecialize },
957 { "nativeForkSystemServer", "(II[II[[IJJ)I",
958 (void *) com_android_internal_os_Zygote_nativeForkSystemServer },
959 { "nativeAllowFileAcrossFork", "(Ljava/lang/String;)V",
960 (void *) com_android_internal_os_Zygote_nativeAllowFileAcrossFork },
961 { "nativeUnmountStorageOnInit", "()V",
962 (void *) com_android_internal_os_Zygote_nativeUnmountStorageOnInit },
963 { "nativePreApplicationInit", "()V",
964 (void *) com_android_internal_os_Zygote_nativePreApplicationInit }
965 };
966
register_com_android_internal_os_Zygote(JNIEnv * env)967 int register_com_android_internal_os_Zygote(JNIEnv* env) {
968 gZygoteClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteClassName));
969 gCallPostForkChildHooks = GetStaticMethodIDOrDie(env, gZygoteClass, "callPostForkChildHooks",
970 "(IZZLjava/lang/String;)V");
971
972 return RegisterMethodsOrDie(env, "com/android/internal/os/Zygote", gMethods, NELEM(gMethods));
973 }
974 } // namespace android
975