1 /* Copyright 2017 The ChromiumOS Authors
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6 #include "system.h"
7
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <grp.h>
11 #include <net/if.h>
12 #include <pwd.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <sys/ioctl.h>
17 #include <sys/prctl.h>
18 #include <sys/socket.h>
19 #include <sys/stat.h>
20 #include <sys/statvfs.h>
21 #include <unistd.h>
22
23 #include <linux/securebits.h>
24
25 #include "syscall_wrapper.h"
26 #include "util.h"
27
28 /*
29 * SECBIT_NO_CAP_AMBIENT_RAISE was added in kernel 4.3, so fill in the
30 * definition if the securebits header doesn't provide it.
31 */
32 #ifndef SECBIT_NO_CAP_AMBIENT_RAISE
33 #define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(6))
34 #endif
35
36 #ifndef SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED
37 #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED (issecure_mask(7))
38 #endif
39
40 /*
41 * Assert the value of SECURE_ALL_BITS at compile-time.
42 * Android devices are currently compiled against 4.4 kernel headers. Kernel 4.3
43 * added a new securebit.
44 * When a new securebit is added, the new SECURE_ALL_BITS mask will return EPERM
45 * when used on older kernels. The compile-time assert will catch this situation
46 * at compile time.
47 */
48 #if defined(__ANDROID__)
49 _Static_assert(SECURE_ALL_BITS == 0x55, "SECURE_ALL_BITS == 0x55.");
50 #endif
51
52 /* Used by lookup_(user|group) functions. */
53 #define MAX_PWENT_SZ (1 << 20)
54 #define MAX_GRENT_SZ (1 << 20)
55
secure_noroot_set_and_locked(uint64_t mask)56 int secure_noroot_set_and_locked(uint64_t mask)
57 {
58 return (mask & (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED)) ==
59 (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED);
60 }
61
lock_securebits(uint64_t skip_mask,bool require_keep_caps)62 int lock_securebits(uint64_t skip_mask, bool require_keep_caps)
63 {
64 /* The general idea is to set all bits, subject to exceptions below. */
65 unsigned long securebits = SECURE_ALL_BITS | SECURE_ALL_LOCKS;
66
67 /*
68 * SECBIT_KEEP_CAPS is special in that it is automatically cleared on
69 * execve(2). This implies that attempts to set SECBIT_KEEP_CAPS (as is
70 * the default) in processes that have it locked already (such as nested
71 * minijail usage) would fail. Thus, unless the caller requires it,
72 * allow it to remain off if it is already locked.
73 */
74 if (!require_keep_caps) {
75 int current_securebits = prctl(PR_GET_SECUREBITS);
76 if (current_securebits < 0) {
77 pwarn("prctl(PR_GET_SECUREBITS) failed");
78 return -1;
79 }
80
81 if ((current_securebits & SECBIT_KEEP_CAPS_LOCKED) != 0 &&
82 (current_securebits & SECBIT_KEEP_CAPS) == 0) {
83 securebits &= ~SECBIT_KEEP_CAPS;
84 }
85 }
86
87 /*
88 * Ambient capabilities can only be raised if they're already present
89 * in the permitted *and* inheritable set. Therefore, we don't really
90 * need to lock the NO_CAP_AMBIENT_RAISE securebit, since we are already
91 * configuring the permitted and inheritable set.
92 */
93 securebits &=
94 ~(SECBIT_NO_CAP_AMBIENT_RAISE | SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED);
95
96 /* Don't set any bits that the user requested not to be touched. */
97 securebits &= ~skip_mask;
98
99 if (!securebits) {
100 warn("not locking any securebits");
101 return 0;
102 }
103 int securebits_ret = prctl(PR_SET_SECUREBITS, securebits);
104 if (securebits_ret < 0) {
105 pwarn("prctl(PR_SET_SECUREBITS) failed");
106 return -1;
107 }
108
109 return 0;
110 }
111
write_proc_file(pid_t pid,const char * content,const char * basename)112 int write_proc_file(pid_t pid, const char *content, const char *basename)
113 {
114 attribute_cleanup_fd int fd = -1;
115 int ret;
116 size_t sz, len;
117 ssize_t written;
118 char filename[32];
119
120 sz = sizeof(filename);
121 ret = snprintf(filename, sz, "/proc/%d/%s", pid, basename);
122 if (ret < 0 || (size_t)ret >= sz) {
123 warn("failed to generate %s filename", basename);
124 return -1;
125 }
126
127 fd = open(filename, O_WRONLY | O_CLOEXEC);
128 if (fd < 0) {
129 pwarn("failed to open '%s'", filename);
130 return -errno;
131 }
132
133 len = strlen(content);
134 written = write(fd, content, len);
135 if (written < 0) {
136 pwarn("failed to write '%s'", filename);
137 return -errno;
138 }
139
140 if ((size_t)written < len) {
141 warn("failed to write %zu bytes to '%s'", len, filename);
142 return -1;
143 }
144 return 0;
145 }
146
147 /*
148 * We specifically do not use cap_valid() as that only tells us the last
149 * valid cap we were *compiled* against (i.e. what the version of kernel
150 * headers says). If we run on a different kernel version, then it's not
151 * uncommon for that to be less (if an older kernel) or more (if a newer
152 * kernel).
153 * Normally, we suck up the answer via /proc. On Android, not all processes are
154 * guaranteed to be able to access '/proc/sys/kernel/cap_last_cap' so we
155 * programmatically find the value by calling prctl(PR_CAPBSET_READ).
156 */
get_last_valid_cap(void)157 unsigned int get_last_valid_cap(void)
158 {
159 unsigned int last_valid_cap = 0;
160 if (is_android()) {
161 for (; prctl(PR_CAPBSET_READ, last_valid_cap, 0, 0, 0) >= 0;
162 ++last_valid_cap)
163 ;
164
165 /* |last_valid_cap| will be the first failing value. */
166 if (last_valid_cap > 0) {
167 last_valid_cap--;
168 }
169 } else {
170 static const char cap_file[] = "/proc/sys/kernel/cap_last_cap";
171 FILE *fp = fopen(cap_file, "re");
172 if (!fp)
173 pdie("fopen(%s)", cap_file);
174 if (fscanf(fp, "%u", &last_valid_cap) != 1)
175 pdie("fscanf(%s)", cap_file);
176 fclose(fp);
177 }
178 return last_valid_cap;
179 }
180
cap_ambient_supported(void)181 int cap_ambient_supported(void)
182 {
183 return prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) >=
184 0;
185 }
186
config_net_loopback(void)187 int config_net_loopback(void)
188 {
189 const char ifname[] = "lo";
190 attribute_cleanup_fd int sock = -1;
191 struct ifreq ifr;
192
193 /* Make sure people don't try to add really long names. */
194 _Static_assert(sizeof(ifname) <= IFNAMSIZ, "interface name too long");
195
196 sock = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
197 if (sock < 0) {
198 pwarn("socket(AF_LOCAL) failed");
199 return -1;
200 }
201
202 /*
203 * Do the equiv of `ip link set up lo`. The kernel will assign
204 * IPv4 (127.0.0.1) & IPv6 (::1) addresses automatically!
205 */
206 strcpy(ifr.ifr_name, ifname);
207 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) {
208 pwarn("ioctl(SIOCGIFFLAGS) failed");
209 return -1;
210 }
211
212 /* The kernel preserves ifr.ifr_name for use. */
213 ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
214 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) {
215 pwarn("ioctl(SIOCSIFFLAGS) failed");
216 return -1;
217 }
218
219 return 0;
220 }
221
write_pid_to_path(pid_t pid,const char * path)222 int write_pid_to_path(pid_t pid, const char *path)
223 {
224 FILE *fp = fopen(path, "we");
225
226 if (!fp) {
227 pwarn("failed to open '%s'", path);
228 return -errno;
229 }
230 if (fprintf(fp, "%d\n", (int)pid) < 0) {
231 /* fprintf(3) does not set errno on failure. */
232 warn("fprintf(%s) failed", path);
233 fclose(fp);
234 return -1;
235 }
236 if (fclose(fp)) {
237 pwarn("fclose(%s) failed", path);
238 return -errno;
239 }
240
241 return 0;
242 }
243
244 /*
245 * Create the |path| directory and its parents (if need be) with |mode|.
246 * If not |isdir|, then |path| is actually a file, so the last component
247 * will not be created.
248 */
mkdir_p(const char * path,mode_t mode,bool isdir)249 int mkdir_p(const char *path, mode_t mode, bool isdir)
250 {
251 int rc;
252 char *dir = strdup(path);
253 if (!dir) {
254 rc = errno;
255 pwarn("strdup(%s) failed", path);
256 return -rc;
257 }
258
259 /* Starting from the root, work our way out to the end. */
260 char *p = strchr(dir + 1, '/');
261 while (p) {
262 *p = '\0';
263 if (mkdir(dir, mode) && errno != EEXIST) {
264 rc = errno;
265 pwarn("mkdir(%s, 0%o) failed", dir, mode);
266 free(dir);
267 return -rc;
268 }
269 *p = '/';
270 p = strchr(p + 1, '/');
271 }
272
273 /*
274 * Create the last directory. We still check EEXIST here in case
275 * of trailing slashes.
276 */
277 free(dir);
278 if (isdir && mkdir(path, mode) && errno != EEXIST) {
279 rc = errno;
280 pwarn("mkdir(%s, 0%o) failed", path, mode);
281 return -rc;
282 }
283 return 0;
284 }
285
286 /*
287 * get_mount_flags: Obtain the mount flags of the mount where |source| lives.
288 */
get_mount_flags(const char * source,unsigned long * mnt_flags)289 int get_mount_flags(const char *source, unsigned long *mnt_flags)
290 {
291 if (mnt_flags) {
292 struct statvfs stvfs_buf;
293 int rc = statvfs(source, &stvfs_buf);
294 if (rc) {
295 rc = errno;
296 pwarn("failed to look up mount flags: source=%s",
297 source);
298 return -rc;
299 }
300 *mnt_flags = stvfs_buf.f_flag;
301 }
302 return 0;
303 }
304
305 /*
306 * setup_mount_destination: Ensures the mount target exists.
307 * Creates it if needed and possible.
308 */
setup_mount_destination(const char * source,const char * dest,uid_t uid,uid_t gid,bool bind)309 int setup_mount_destination(const char *source, const char *dest, uid_t uid,
310 uid_t gid, bool bind)
311 {
312 int rc;
313 struct stat st_buf;
314 bool domkdir;
315
316 rc = stat(dest, &st_buf);
317 if (rc == 0) /* destination exists */
318 return 0;
319
320 /*
321 * Try to create the destination.
322 * Either make a directory or touch a file depending on the source type.
323 *
324 * If the source isn't an absolute path, assume it is a filesystem type
325 * such as "tmpfs" and create a directory to mount it on. The dest will
326 * be something like "none" or "proc" which we shouldn't be checking.
327 */
328 if (source[0] == '/') {
329 /* The source is an absolute path -- it better exist! */
330 rc = stat(source, &st_buf);
331 if (rc) {
332 rc = errno;
333 pwarn("stat(%s) failed", source);
334 return -rc;
335 }
336
337 /*
338 * If bind mounting, we only create a directory if the source
339 * is a directory, else we always bind mount it as a file to
340 * support device nodes, sockets, etc...
341 *
342 * For all other mounts, we assume a block/char source is
343 * going to want a directory to mount to. If the source is
344 * something else (e.g. a fifo or socket), this probably will
345 * not do the right thing, but we'll fail later on when we try
346 * to mount(), so shouldn't be a big deal.
347 */
348 domkdir = S_ISDIR(st_buf.st_mode) ||
349 (!bind && (S_ISBLK(st_buf.st_mode) ||
350 S_ISCHR(st_buf.st_mode)));
351 } else {
352 /* The source is a relative path -- assume it's a pseudo fs. */
353
354 /* Disallow relative bind mounts. */
355 if (bind) {
356 warn("relative bind-mounts are not allowed: source=%s",
357 source);
358 return -EINVAL;
359 }
360
361 domkdir = true;
362 }
363
364 /*
365 * Now that we know what we want to do, do it!
366 * We always create the intermediate dirs and the final path with 0755
367 * perms and root/root ownership. This shouldn't be a problem because
368 * the actual mount will set those perms/ownership on the mount point
369 * which is all people should need to access it.
370 */
371 rc = mkdir_p(dest, 0755, domkdir);
372 if (rc)
373 return rc;
374 if (!domkdir) {
375 attribute_cleanup_fd int fd =
376 open(dest, O_RDWR | O_CREAT | O_CLOEXEC, 0700);
377 if (fd < 0) {
378 rc = errno;
379 pwarn("open(%s) failed", dest);
380 return -rc;
381 }
382 }
383 if (chown(dest, uid, gid)) {
384 rc = errno;
385 pwarn("chown(%s, %u, %u) failed", dest, uid, gid);
386 return -rc;
387 }
388 return 0;
389 }
390
391 /*
392 * lookup_user: Gets the uid/gid for the given username.
393 */
lookup_user(const char * user,uid_t * uid,gid_t * gid)394 int lookup_user(const char *user, uid_t *uid, gid_t *gid)
395 {
396 char *buf = NULL;
397 struct passwd pw;
398 struct passwd *ppw = NULL;
399 /*
400 * sysconf(_SC_GETPW_R_SIZE_MAX), under glibc, is documented to return
401 * a suggested starting size for the buffer, so let's try getting this
402 * size first, and fallback to a default othersise.
403 */
404 ssize_t sz = sysconf(_SC_GETPW_R_SIZE_MAX);
405 if (sz == -1)
406 sz = 65536; /* your guess is as good as mine... */
407
408 do {
409 buf = malloc(sz);
410 if (!buf)
411 return -ENOMEM;
412 int err = getpwnam_r(user, &pw, buf, sz, &ppw);
413 /*
414 * We're safe to free the buffer here. The strings inside |pw|
415 * point inside |buf|, but we don't use any of them; this leaves
416 * the pointers dangling but it's safe.
417 * |ppw| points at |pw| if getpwnam_r(3) succeeded.
418 */
419 free(buf);
420 if (err == ERANGE) {
421 /* |buf| was too small, retry with a bigger one. */
422 sz <<= 1;
423 } else if (err != 0) {
424 /* We got an error not related to the size of |buf|. */
425 return -err;
426 } else if (!ppw) {
427 /* Not found. */
428 return -ENOENT;
429 } else {
430 *uid = ppw->pw_uid;
431 *gid = ppw->pw_gid;
432 return 0;
433 }
434 } while (sz <= MAX_PWENT_SZ);
435
436 /* A buffer of size MAX_PWENT_SZ is still too small, return an error. */
437 return -ERANGE;
438 }
439
440 /*
441 * lookup_group: Gets the gid for the given group name.
442 */
lookup_group(const char * group,gid_t * gid)443 int lookup_group(const char *group, gid_t *gid)
444 {
445 char *buf = NULL;
446 struct group gr;
447 struct group *pgr = NULL;
448 /*
449 * sysconf(_SC_GETGR_R_SIZE_MAX), under glibc, is documented to return
450 * a suggested starting size for the buffer, so let's try getting this
451 * size first, and fallback to a default otherwise.
452 */
453 ssize_t sz = sysconf(_SC_GETGR_R_SIZE_MAX);
454 if (sz == -1)
455 sz = 65536; /* and mine is as good as yours, really */
456
457 do {
458 buf = malloc(sz);
459 if (!buf)
460 return -ENOMEM;
461 int err = getgrnam_r(group, &gr, buf, sz, &pgr);
462 /*
463 * We're safe to free the buffer here. The strings inside |gr|
464 * point inside |buf|, but we don't use any of them; this leaves
465 * the pointers dangling but it's safe.
466 * |pgr| points at |gr| if getgrnam_r(3) succeeded.
467 */
468 free(buf);
469 if (err == ERANGE) {
470 /* |buf| was too small, retry with a bigger one. */
471 sz <<= 1;
472 } else if (err != 0) {
473 /* We got an error not related to the size of |buf|. */
474 return -err;
475 } else if (!pgr) {
476 /* Not found. */
477 return -ENOENT;
478 } else {
479 *gid = pgr->gr_gid;
480 return 0;
481 }
482 } while (sz <= MAX_GRENT_SZ);
483
484 /* A buffer of size MAX_GRENT_SZ is still too small, return an error. */
485 return -ERANGE;
486 }
487
seccomp_action_is_available(const char * wanted)488 static bool seccomp_action_is_available(const char *wanted)
489 {
490 if (is_android()) {
491 /*
492 * Accessing |actions_avail| is generating SELinux denials, so
493 * skip for now.
494 * TODO(crbug.com/978022, jorgelo): Remove once the denial is
495 * fixed.
496 */
497 return false;
498 }
499 const char actions_avail_path[] =
500 "/proc/sys/kernel/seccomp/actions_avail";
501 FILE *f = fopen(actions_avail_path, "re");
502
503 if (!f) {
504 pwarn("fopen(%s) failed", actions_avail_path);
505 return false;
506 }
507
508 attribute_cleanup_str char *actions_avail = NULL;
509 size_t buf_size = 0;
510 if (getline(&actions_avail, &buf_size, f) < 0) {
511 pwarn("getline() failed");
512 return false;
513 }
514
515 /*
516 * This is just substring search, which means that partial matches will
517 * match too (e.g. "action" would match "longaction"). There are no
518 * seccomp actions which include other actions though, so we're good for
519 * now. Eventually we might want to split the string by spaces.
520 */
521 return strstr(actions_avail, wanted) != NULL;
522 }
523
seccomp_ret_log_available(void)524 int seccomp_ret_log_available(void)
525 {
526 static int ret_log_available = -1;
527
528 if (ret_log_available == -1)
529 ret_log_available = seccomp_action_is_available("log");
530
531 return ret_log_available;
532 }
533
seccomp_ret_kill_process_available(void)534 int seccomp_ret_kill_process_available(void)
535 {
536 static int ret_kill_process_available = -1;
537
538 if (ret_kill_process_available == -1)
539 ret_kill_process_available =
540 seccomp_action_is_available("kill_process");
541
542 return ret_kill_process_available;
543 }
544
seccomp_filter_flags_available(unsigned int flags)545 bool seccomp_filter_flags_available(unsigned int flags)
546 {
547 return sys_seccomp(SECCOMP_SET_MODE_FILTER, flags, NULL) != -1 ||
548 errno != EINVAL;
549 }
550
is_canonical_path(const char * path)551 bool is_canonical_path(const char *path)
552 {
553 attribute_cleanup_str char *rp = realpath(path, NULL);
554 if (!rp) {
555 return false;
556 }
557
558 if (streq(path, rp)) {
559 return true;
560 }
561
562 size_t path_len = strlen(path);
563 size_t rp_len = strlen(rp);
564 /* If |path| has a single trailing slash, that's OK. */
565 return path_len == rp_len + 1 && strncmp(path, rp, rp_len) == 0 &&
566 path[path_len - 1] == '/';
567 }
568