1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // https://chromium.googlesource.com/chromium/src/+/master/docs/linux_suid_sandbox.md
6
7 #include "sandbox/linux/suid/common/sandbox.h"
8
9 #define _GNU_SOURCE
10 #include <asm/unistd.h>
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <limits.h>
14 #include <sched.h>
15 #include <signal.h>
16 #include <stdarg.h>
17 #include <stdbool.h>
18 #include <stddef.h>
19 #include <stdint.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sys/prctl.h>
24 #include <sys/resource.h>
25 #include <sys/socket.h>
26 #include <sys/stat.h>
27 #include <sys/time.h>
28 #include <sys/types.h>
29 #include <sys/vfs.h>
30 #include <sys/wait.h>
31 #include <unistd.h>
32
33 #include "sandbox/linux/suid/common/suid_unsafe_environment_variables.h"
34 #include "sandbox/linux/suid/process_util.h"
35
36 #if !defined(CLONE_NEWPID)
37 #define CLONE_NEWPID 0x20000000
38 #endif
39 #if !defined(CLONE_NEWNET)
40 #define CLONE_NEWNET 0x40000000
41 #endif
42
43 static bool DropRoot();
44
45 #define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x)
46
47 static void FatalError(const char* msg, ...)
48 __attribute__((noreturn, format(printf, 1, 2)));
49
FatalError(const char * msg,...)50 static void FatalError(const char* msg, ...) {
51 va_list ap;
52 va_start(ap, msg);
53
54 vfprintf(stderr, msg, ap);
55 fprintf(stderr, ": %s\n", strerror(errno));
56 fflush(stderr);
57 va_end(ap);
58 _exit(1);
59 }
60
ExitWithErrorSignalHandler(int signal)61 static void ExitWithErrorSignalHandler(int signal) {
62 const char msg[] = "\nThe setuid sandbox got signaled, exiting.\n";
63 if (-1 == write(2, msg, sizeof(msg) - 1)) {
64 // Do nothing.
65 }
66
67 _exit(1);
68 }
69
70 // We will chroot() to the helper's /proc/self directory. Anything there will
71 // not exist anymore if we make sure to wait() for the helper.
72 //
73 // /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty
74 // even if the helper survives as a zombie.
75 //
76 // There is very little reason to use fdinfo/ instead of fd/ but we are
77 // paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/
78 #define SAFE_DIR "/proc/self/fdinfo"
79 #define SAFE_DIR2 "/proc/self/fd"
80
SpawnChrootHelper()81 static bool SpawnChrootHelper() {
82 int sv[2];
83 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
84 perror("socketpair");
85 return false;
86 }
87
88 char* safedir = NULL;
89 struct stat sdir_stat;
90 if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) {
91 safedir = SAFE_DIR;
92 } else if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) {
93 safedir = SAFE_DIR2;
94 } else {
95 fprintf(stderr, "Could not find %s\n", SAFE_DIR2);
96 return false;
97 }
98
99 const pid_t pid = syscall(__NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0);
100
101 if (pid == -1) {
102 perror("clone");
103 close(sv[0]);
104 close(sv[1]);
105 return false;
106 }
107
108 if (pid == 0) {
109 // We share our files structure with an untrusted process. As a security in
110 // depth measure, we make sure that we can't open anything by mistake.
111 // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT
112
113 const struct rlimit nofile = {0, 0};
114 if (setrlimit(RLIMIT_NOFILE, &nofile))
115 FatalError("Setting RLIMIT_NOFILE");
116
117 if (close(sv[1]))
118 FatalError("close");
119
120 // wait for message
121 char msg;
122 ssize_t bytes;
123 do {
124 bytes = read(sv[0], &msg, 1);
125 } while (bytes == -1 && errno == EINTR);
126
127 if (bytes == 0)
128 _exit(0);
129 if (bytes != 1)
130 FatalError("read");
131
132 // do chrooting
133 if (msg != kMsgChrootMe)
134 FatalError("Unknown message from sandboxed process");
135
136 // sanity check
137 if (chdir(safedir))
138 FatalError("Cannot chdir into /proc/ directory");
139
140 if (chroot(safedir))
141 FatalError("Cannot chroot into /proc/ directory");
142
143 if (chdir("/"))
144 FatalError("Cannot chdir to / after chroot");
145
146 const char reply = kMsgChrootSuccessful;
147 do {
148 bytes = write(sv[0], &reply, 1);
149 } while (bytes == -1 && errno == EINTR);
150
151 if (bytes != 1)
152 FatalError("Writing reply");
153
154 _exit(0);
155 // We now become a zombie. /proc/self/fd(info) is now an empty dir and we
156 // are chrooted there.
157 // Our (unprivileged) parent should not even be able to open "." or "/"
158 // since they would need to pass the ptrace() check. If our parent wait()
159 // for us, our root directory will completely disappear.
160 }
161
162 if (close(sv[0])) {
163 close(sv[1]);
164 perror("close");
165 return false;
166 }
167
168 // In the parent process, we install an environment variable containing the
169 // number of the file descriptor.
170 char desc_str[64];
171 int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]);
172 if (printed < 0 || printed >= (int)sizeof(desc_str)) {
173 fprintf(stderr, "Failed to snprintf\n");
174 return false;
175 }
176
177 if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) {
178 perror("setenv");
179 close(sv[1]);
180 return false;
181 }
182
183 // We also install an environment variable containing the pid of the child
184 char helper_pid_str[64];
185 printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid);
186 if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) {
187 fprintf(stderr, "Failed to snprintf\n");
188 return false;
189 }
190
191 if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) {
192 perror("setenv");
193 close(sv[1]);
194 return false;
195 }
196
197 return true;
198 }
199
200 // Block until child_pid exits, then exit. Try to preserve the exit code.
WaitForChildAndExit(pid_t child_pid)201 static void WaitForChildAndExit(pid_t child_pid) {
202 int exit_code = -1;
203 siginfo_t reaped_child_info;
204
205 // Don't "Core" on SIGABRT. SIGABRT is sent by the Chrome OS session manager
206 // when things are hanging.
207 // Here, the current process is going to waitid() and _exit(), so there is no
208 // point in generating a crash report. The child process is the one
209 // blocking us.
210 if (signal(SIGABRT, ExitWithErrorSignalHandler) == SIG_ERR) {
211 FatalError("Failed to change signal handler");
212 }
213
214 int wait_ret =
215 HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED));
216
217 if (!wait_ret && reaped_child_info.si_pid == child_pid) {
218 if (reaped_child_info.si_code == CLD_EXITED) {
219 exit_code = reaped_child_info.si_status;
220 } else {
221 // Exit with code 0 if the child got signaled.
222 exit_code = 0;
223 }
224 }
225 _exit(exit_code);
226 }
227
MoveToNewNamespaces()228 static bool MoveToNewNamespaces() {
229 // These are the sets of flags which we'll try, in order.
230 const int kCloneExtraFlags[] = {CLONE_NEWPID | CLONE_NEWNET, CLONE_NEWPID, };
231
232 // We need to close kZygoteIdFd before the child can continue. We use this
233 // socketpair to tell the child when to continue;
234 int sync_fds[2];
235 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) {
236 FatalError("Failed to create a socketpair");
237 }
238
239 for (size_t i = 0; i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]);
240 i++) {
241 pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0);
242 const int clone_errno = errno;
243
244 if (pid > 0) {
245 if (!DropRoot()) {
246 FatalError("Could not drop privileges");
247 } else {
248 if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD))
249 FatalError("Could not close socketpair");
250 // The kZygoteIdFd needs to be closed in the parent before
251 // Zygote gets started.
252 if (close(kZygoteIdFd))
253 FatalError("close");
254 // Tell our child to continue
255 if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1)
256 FatalError("send");
257 if (close(sync_fds[1]))
258 FatalError("close");
259 // We want to keep a full process tree and we don't want our childs to
260 // be reparented to (the outer PID namespace) init. So we wait for it.
261 WaitForChildAndExit(pid);
262 }
263 // NOTREACHED
264 FatalError("Not reached");
265 }
266
267 if (pid == 0) {
268 if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR))
269 FatalError("Could not close socketpair");
270
271 // Wait for the parent to confirm it closed kZygoteIdFd before we
272 // continue
273 char should_continue;
274 if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1)
275 FatalError("Read on socketpair");
276 if (close(sync_fds[0]))
277 FatalError("close");
278
279 if (kCloneExtraFlags[i] & CLONE_NEWPID) {
280 setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */);
281 } else {
282 unsetenv(kSandboxPIDNSEnvironmentVarName);
283 }
284
285 if (kCloneExtraFlags[i] & CLONE_NEWNET) {
286 setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */);
287 } else {
288 unsetenv(kSandboxNETNSEnvironmentVarName);
289 }
290
291 break;
292 }
293
294 // If EINVAL then the system doesn't support the requested flags, so
295 // continue to try a different set.
296 // On any other errno value the system *does* support these flags but
297 // something went wrong, hence we bail with an error message rather then
298 // provide less security.
299 if (errno != EINVAL) {
300 fprintf(stderr, "Failed to move to new namespace:");
301 if (kCloneExtraFlags[i] & CLONE_NEWPID) {
302 fprintf(stderr, " PID namespaces supported,");
303 }
304 if (kCloneExtraFlags[i] & CLONE_NEWNET) {
305 fprintf(stderr, " Network namespace supported,");
306 }
307 fprintf(stderr, " but failed: errno = %s\n", strerror(clone_errno));
308 return false;
309 }
310 }
311
312 // If the system doesn't support NEWPID then we carry on anyway.
313 return true;
314 }
315
DropRoot()316 static bool DropRoot() {
317 if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) {
318 perror("prctl(PR_SET_DUMPABLE)");
319 return false;
320 }
321
322 if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) {
323 perror("Still dumpable after prctl(PR_SET_DUMPABLE)");
324 return false;
325 }
326
327 gid_t rgid, egid, sgid;
328 if (getresgid(&rgid, &egid, &sgid)) {
329 perror("getresgid");
330 return false;
331 }
332
333 if (setresgid(rgid, rgid, rgid)) {
334 perror("setresgid");
335 return false;
336 }
337
338 uid_t ruid, euid, suid;
339 if (getresuid(&ruid, &euid, &suid)) {
340 perror("getresuid");
341 return false;
342 }
343
344 if (setresuid(ruid, ruid, ruid)) {
345 perror("setresuid");
346 return false;
347 }
348
349 return true;
350 }
351
SetupChildEnvironment()352 static bool SetupChildEnvironment() {
353 unsigned i;
354
355 // ld.so may have cleared several environment variables because we are SUID.
356 // However, the child process might need them so zygote_host_linux.cc saves a
357 // copy in SANDBOX_$x. This is safe because we have dropped root by this
358 // point, so we can only exec a binary with the permissions of the user who
359 // ran us in the first place.
360
361 for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) {
362 const char* const envvar = kSUIDUnsafeEnvironmentVariables[i];
363 char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar);
364 if (!saved_envvar)
365 return false;
366
367 const char* const value = getenv(saved_envvar);
368 if (value) {
369 setenv(envvar, value, 1 /* overwrite */);
370 unsetenv(saved_envvar);
371 }
372
373 free(saved_envvar);
374 }
375
376 return true;
377 }
378
CheckAndExportApiVersion()379 bool CheckAndExportApiVersion() {
380 // Check the environment to see if a specific API version was requested.
381 // assume version 0 if none.
382 int api_number = -1;
383 char* api_string = getenv(kSandboxEnvironmentApiRequest);
384 if (!api_string) {
385 api_number = 0;
386 } else {
387 errno = 0;
388 char* endptr = NULL;
389 long long_api_number = strtol(api_string, &endptr, 10);
390 if (!endptr || *endptr || errno != 0 || long_api_number < INT_MIN ||
391 long_api_number > INT_MAX) {
392 return false;
393 }
394 api_number = long_api_number;
395 }
396
397 // Warn only for now.
398 if (api_number != kSUIDSandboxApiNumber) {
399 fprintf(
400 stderr,
401 "The setuid sandbox provides API version %d, "
402 "but you need %d\n"
403 "Please read "
404 "https://chromium.googlesource.com/chromium/src/+/master/docs/linux_suid_sandbox_development.md."
405 "\n\n",
406 kSUIDSandboxApiNumber,
407 api_number);
408 }
409
410 // Export our version so that the sandboxed process can verify it did not
411 // use an old sandbox.
412 char version_string[64];
413 snprintf(version_string, sizeof(version_string), "%d", kSUIDSandboxApiNumber);
414 if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) {
415 perror("setenv");
416 return false;
417 }
418
419 return true;
420 }
421
main(int argc,char ** argv)422 int main(int argc, char** argv) {
423 if (argc <= 1) {
424 if (argc <= 0) {
425 return 1;
426 }
427
428 fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]);
429 return 1;
430 }
431
432 // Allow someone to query our API version
433 if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) {
434 printf("%d\n", kSUIDSandboxApiNumber);
435 return 0;
436 }
437
438 // We cannot adjust /proc/pid/oom_adj for sandboxed renderers
439 // because those files are owned by root. So we need a helper here.
440 if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) {
441 char* endptr = NULL;
442 long score;
443 errno = 0;
444 unsigned long pid_ul = strtoul(argv[2], &endptr, 10);
445 if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0)
446 return 1;
447 pid_t pid = pid_ul;
448 endptr = NULL;
449 errno = 0;
450 score = strtol(argv[3], &endptr, 10);
451 if (score == LONG_MAX || score == LONG_MIN || !endptr || *endptr ||
452 errno != 0) {
453 return 1;
454 }
455 return AdjustOOMScore(pid, score);
456 }
457
458 // Protect the core setuid sandbox functionality with an API version
459 if (!CheckAndExportApiVersion()) {
460 return 1;
461 }
462
463 if (geteuid() != 0) {
464 fprintf(stderr,
465 "The setuid sandbox is not running as root. Common causes:\n"
466 " * An unprivileged process using ptrace on it, like a debugger.\n"
467 " * A parent process set prctl(PR_SET_NO_NEW_PRIVS, ...)\n");
468 }
469
470 if (!MoveToNewNamespaces())
471 return 1;
472 if (!SpawnChrootHelper())
473 return 1;
474 if (!DropRoot())
475 return 1;
476 if (!SetupChildEnvironment())
477 return 1;
478
479 execv(argv[1], &argv[1]);
480 FatalError("execv failed");
481
482 return 1;
483 }
484