• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The ChromiumOS Authors
2  * Use of this source code is governed by a BSD-style license that can be
3  * found in the LICENSE file.
4  */
5 
6 #include "system.h"
7 
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <grp.h>
11 #include <net/if.h>
12 #include <pwd.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <sys/ioctl.h>
17 #include <sys/prctl.h>
18 #include <sys/socket.h>
19 #include <sys/stat.h>
20 #include <sys/statvfs.h>
21 #include <unistd.h>
22 
23 #include <linux/securebits.h>
24 
25 #include "syscall_wrapper.h"
26 #include "util.h"
27 
28 /*
29  * SECBIT_NO_CAP_AMBIENT_RAISE was added in kernel 4.3, so fill in the
30  * definition if the securebits header doesn't provide it.
31  */
32 #ifndef SECBIT_NO_CAP_AMBIENT_RAISE
33 #define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(6))
34 #endif
35 
36 #ifndef SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED
37 #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED (issecure_mask(7))
38 #endif
39 
40 /*
41  * Assert the value of SECURE_ALL_BITS at compile-time.
42  * Android devices are currently compiled against 4.4 kernel headers. Kernel 4.3
43  * added a new securebit.
44  * When a new securebit is added, the new SECURE_ALL_BITS mask will return EPERM
45  * when used on older kernels. The compile-time assert will catch this situation
46  * at compile time.
47  */
48 #if defined(__ANDROID__)
49 _Static_assert(SECURE_ALL_BITS == 0x55, "SECURE_ALL_BITS == 0x55.");
50 #endif
51 
52 /* Used by lookup_(user|group) functions. */
53 #define MAX_PWENT_SZ (1 << 20)
54 #define MAX_GRENT_SZ (1 << 20)
55 
secure_noroot_set_and_locked(uint64_t mask)56 int secure_noroot_set_and_locked(uint64_t mask)
57 {
58 	return (mask & (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED)) ==
59 	       (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED);
60 }
61 
lock_securebits(uint64_t skip_mask,bool require_keep_caps)62 int lock_securebits(uint64_t skip_mask, bool require_keep_caps)
63 {
64 	/* The general idea is to set all bits, subject to exceptions below. */
65 	unsigned long securebits = SECURE_ALL_BITS | SECURE_ALL_LOCKS;
66 
67 	/*
68 	 * SECBIT_KEEP_CAPS is special in that it is automatically cleared on
69 	 * execve(2). This implies that attempts to set SECBIT_KEEP_CAPS (as is
70 	 * the default) in processes that have it locked already (such as nested
71 	 * minijail usage) would fail. Thus, unless the caller requires it,
72 	 * allow it to remain off if it is already locked.
73 	 */
74 	if (!require_keep_caps) {
75 		int current_securebits = prctl(PR_GET_SECUREBITS);
76 		if (current_securebits < 0) {
77 			pwarn("prctl(PR_GET_SECUREBITS) failed");
78 			return -1;
79 		}
80 
81 		if ((current_securebits & SECBIT_KEEP_CAPS_LOCKED) != 0 &&
82 		    (current_securebits & SECBIT_KEEP_CAPS) == 0) {
83 			securebits &= ~SECBIT_KEEP_CAPS;
84 		}
85 	}
86 
87 	/*
88 	 * Ambient capabilities can only be raised if they're already present
89 	 * in the permitted *and* inheritable set. Therefore, we don't really
90 	 * need to lock the NO_CAP_AMBIENT_RAISE securebit, since we are already
91 	 * configuring the permitted and inheritable set.
92 	 */
93 	securebits &=
94 	    ~(SECBIT_NO_CAP_AMBIENT_RAISE | SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED);
95 
96 	/* Don't set any bits that the user requested not to be touched. */
97 	securebits &= ~skip_mask;
98 
99 	if (!securebits) {
100 		warn("not locking any securebits");
101 		return 0;
102 	}
103 	int securebits_ret = prctl(PR_SET_SECUREBITS, securebits);
104 	if (securebits_ret < 0) {
105 		pwarn("prctl(PR_SET_SECUREBITS) failed");
106 		return -1;
107 	}
108 
109 	return 0;
110 }
111 
write_proc_file(pid_t pid,const char * content,const char * basename)112 int write_proc_file(pid_t pid, const char *content, const char *basename)
113 {
114 	attribute_cleanup_fd int fd = -1;
115 	int ret;
116 	size_t sz, len;
117 	ssize_t written;
118 	char filename[32];
119 
120 	sz = sizeof(filename);
121 	ret = snprintf(filename, sz, "/proc/%d/%s", pid, basename);
122 	if (ret < 0 || (size_t)ret >= sz) {
123 		warn("failed to generate %s filename", basename);
124 		return -1;
125 	}
126 
127 	fd = open(filename, O_WRONLY | O_CLOEXEC);
128 	if (fd < 0) {
129 		pwarn("failed to open '%s'", filename);
130 		return -errno;
131 	}
132 
133 	len = strlen(content);
134 	written = write(fd, content, len);
135 	if (written < 0) {
136 		pwarn("failed to write '%s'", filename);
137 		return -errno;
138 	}
139 
140 	if ((size_t)written < len) {
141 		warn("failed to write %zu bytes to '%s'", len, filename);
142 		return -1;
143 	}
144 	return 0;
145 }
146 
147 /*
148  * We specifically do not use cap_valid() as that only tells us the last
149  * valid cap we were *compiled* against (i.e. what the version of kernel
150  * headers says). If we run on a different kernel version, then it's not
151  * uncommon for that to be less (if an older kernel) or more (if a newer
152  * kernel).
153  * Normally, we suck up the answer via /proc. On Android, not all processes are
154  * guaranteed to be able to access '/proc/sys/kernel/cap_last_cap' so we
155  * programmatically find the value by calling prctl(PR_CAPBSET_READ).
156  */
get_last_valid_cap(void)157 unsigned int get_last_valid_cap(void)
158 {
159 	unsigned int last_valid_cap = 0;
160 	if (is_android()) {
161 		for (; prctl(PR_CAPBSET_READ, last_valid_cap, 0, 0, 0) >= 0;
162 		     ++last_valid_cap)
163 			;
164 
165 		/* |last_valid_cap| will be the first failing value. */
166 		if (last_valid_cap > 0) {
167 			last_valid_cap--;
168 		}
169 	} else {
170 		static const char cap_file[] = "/proc/sys/kernel/cap_last_cap";
171 		FILE *fp = fopen(cap_file, "re");
172 		if (!fp)
173 			pdie("fopen(%s)", cap_file);
174 		if (fscanf(fp, "%u", &last_valid_cap) != 1)
175 			pdie("fscanf(%s)", cap_file);
176 		fclose(fp);
177 	}
178 	/* Caps are bitfields stored in 64-bit int. */
179 	if (last_valid_cap > 64)
180 		pdie("unable to detect last valid cap: %u > 64",
181 		     last_valid_cap);
182 	return last_valid_cap;
183 }
184 
cap_ambient_supported(void)185 int cap_ambient_supported(void)
186 {
187 	return prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) >=
188 	       0;
189 }
190 
config_net_loopback(void)191 int config_net_loopback(void)
192 {
193 	const char ifname[] = "lo";
194 	attribute_cleanup_fd int sock = -1;
195 	struct ifreq ifr;
196 
197 	/* Make sure people don't try to add really long names. */
198 	_Static_assert(sizeof(ifname) <= IFNAMSIZ, "interface name too long");
199 
200 	sock = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
201 	if (sock < 0) {
202 		pwarn("socket(AF_LOCAL) failed");
203 		return -1;
204 	}
205 
206 	/*
207 	 * Do the equiv of `ip link set up lo`.  The kernel will assign
208 	 * IPv4 (127.0.0.1) & IPv6 (::1) addresses automatically!
209 	 */
210 	strcpy(ifr.ifr_name, ifname);
211 	if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) {
212 		pwarn("ioctl(SIOCGIFFLAGS) failed");
213 		return -1;
214 	}
215 
216 	/* The kernel preserves ifr.ifr_name for use. */
217 	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
218 	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) {
219 		pwarn("ioctl(SIOCSIFFLAGS) failed");
220 		return -1;
221 	}
222 
223 	return 0;
224 }
225 
write_pid_to_path(pid_t pid,const char * path)226 int write_pid_to_path(pid_t pid, const char *path)
227 {
228 	FILE *fp = fopen(path, "we");
229 
230 	if (!fp) {
231 		pwarn("failed to open '%s'", path);
232 		return -errno;
233 	}
234 	if (fprintf(fp, "%d\n", (int)pid) < 0) {
235 		/* fprintf(3) does not set errno on failure. */
236 		warn("fprintf(%s) failed", path);
237 		fclose(fp);
238 		return -1;
239 	}
240 	if (fclose(fp)) {
241 		pwarn("fclose(%s) failed", path);
242 		return -errno;
243 	}
244 
245 	return 0;
246 }
247 
248 /*
249  * Create the |path| directory and its parents (if need be) with |mode|.
250  * If not |isdir|, then |path| is actually a file, so the last component
251  * will not be created.
252  */
mkdir_p(const char * path,mode_t mode,bool isdir)253 int mkdir_p(const char *path, mode_t mode, bool isdir)
254 {
255 	int rc;
256 	char *dir = strdup(path);
257 	if (!dir) {
258 		rc = errno;
259 		pwarn("strdup(%s) failed", path);
260 		return -rc;
261 	}
262 
263 	/* Starting from the root, work our way out to the end. */
264 	char *p = strchr(dir + 1, '/');
265 	while (p) {
266 		*p = '\0';
267 		if (mkdir(dir, mode) && errno != EEXIST) {
268 			rc = errno;
269 			pwarn("mkdir(%s, 0%o) failed", dir, mode);
270 			free(dir);
271 			return -rc;
272 		}
273 		*p = '/';
274 		p = strchr(p + 1, '/');
275 	}
276 
277 	/*
278 	 * Create the last directory.  We still check EEXIST here in case
279 	 * of trailing slashes.
280 	 */
281 	free(dir);
282 	if (isdir && mkdir(path, mode) && errno != EEXIST) {
283 		rc = errno;
284 		pwarn("mkdir(%s, 0%o) failed", path, mode);
285 		return -rc;
286 	}
287 	return 0;
288 }
289 
290 /*
291  * get_mount_flags: Obtain the mount flags of the mount where |source| lives.
292  */
get_mount_flags(const char * source,unsigned long * mnt_flags)293 int get_mount_flags(const char *source, unsigned long *mnt_flags)
294 {
295 	if (mnt_flags) {
296 		struct statvfs stvfs_buf;
297 		int rc = statvfs(source, &stvfs_buf);
298 		if (rc) {
299 			rc = errno;
300 			pwarn("failed to look up mount flags: source=%s",
301 			      source);
302 			return -rc;
303 		}
304 		*mnt_flags = stvfs_buf.f_flag;
305 	}
306 	return 0;
307 }
308 
309 /*
310  * setup_mount_destination: Ensures the mount target exists.
311  * Creates it if needed and possible.
312  */
setup_mount_destination(const char * source,const char * dest,uid_t uid,uid_t gid,bool bind)313 int setup_mount_destination(const char *source, const char *dest, uid_t uid,
314 			    uid_t gid, bool bind)
315 {
316 	int rc;
317 	struct stat st_buf;
318 	bool domkdir;
319 
320 	rc = stat(dest, &st_buf);
321 	if (rc == 0) /* destination exists */
322 		return 0;
323 
324 	/*
325 	 * Try to create the destination.
326 	 * Either make a directory or touch a file depending on the source type.
327 	 *
328 	 * If the source isn't an absolute path, assume it is a filesystem type
329 	 * such as "tmpfs" and create a directory to mount it on.  The dest will
330 	 * be something like "none" or "proc" which we shouldn't be checking.
331 	 */
332 	if (source[0] == '/') {
333 		/* The source is an absolute path -- it better exist! */
334 		rc = stat(source, &st_buf);
335 		if (rc) {
336 			rc = errno;
337 			pwarn("stat(%s) failed", source);
338 			return -rc;
339 		}
340 
341 		/*
342 		 * If bind mounting, we only create a directory if the source
343 		 * is a directory, else we always bind mount it as a file to
344 		 * support device nodes, sockets, etc...
345 		 *
346 		 * For all other mounts, we assume a block/char source is
347 		 * going to want a directory to mount to.  If the source is
348 		 * something else (e.g. a fifo or socket), this probably will
349 		 * not do the right thing, but we'll fail later on when we try
350 		 * to mount(), so shouldn't be a big deal.
351 		 */
352 		domkdir = S_ISDIR(st_buf.st_mode) ||
353 			  (!bind && (S_ISBLK(st_buf.st_mode) ||
354 				     S_ISCHR(st_buf.st_mode)));
355 	} else {
356 		/* The source is a relative path -- assume it's a pseudo fs. */
357 
358 		/* Disallow relative bind mounts. */
359 		if (bind) {
360 			warn("relative bind-mounts are not allowed: source=%s",
361 			     source);
362 			return -EINVAL;
363 		}
364 
365 		domkdir = true;
366 	}
367 
368 	/*
369 	 * Now that we know what we want to do, do it!
370 	 * We always create the intermediate dirs and the final path with 0755
371 	 * perms and root/root ownership.  This shouldn't be a problem because
372 	 * the actual mount will set those perms/ownership on the mount point
373 	 * which is all people should need to access it.
374 	 */
375 	rc = mkdir_p(dest, 0755, domkdir);
376 	if (rc)
377 		return rc;
378 	if (!domkdir) {
379 		attribute_cleanup_fd int fd =
380 		    open(dest, O_RDWR | O_CREAT | O_CLOEXEC, 0700);
381 		if (fd < 0) {
382 			rc = errno;
383 			pwarn("open(%s) failed", dest);
384 			return -rc;
385 		}
386 	}
387 	if (chown(dest, uid, gid)) {
388 		rc = errno;
389 		pwarn("chown(%s, %u, %u) failed", dest, uid, gid);
390 		return -rc;
391 	}
392 	return 0;
393 }
394 
395 /*
396  * lookup_user: Gets the uid/gid for the given username.
397  */
lookup_user(const char * user,uid_t * uid,gid_t * gid)398 int lookup_user(const char *user, uid_t *uid, gid_t *gid)
399 {
400 	char *buf = NULL;
401 	struct passwd pw;
402 	struct passwd *ppw = NULL;
403 	/*
404 	 * sysconf(_SC_GETPW_R_SIZE_MAX), under glibc, is documented to return
405 	 * a suggested starting size for the buffer, so let's try getting this
406 	 * size first, and fallback to a default othersise.
407 	 */
408 	ssize_t sz = sysconf(_SC_GETPW_R_SIZE_MAX);
409 	if (sz == -1)
410 		sz = 65536; /* your guess is as good as mine... */
411 
412 	do {
413 		buf = malloc(sz);
414 		if (!buf)
415 			return -ENOMEM;
416 		int err = getpwnam_r(user, &pw, buf, sz, &ppw);
417 		/*
418 		 * We're safe to free the buffer here. The strings inside |pw|
419 		 * point inside |buf|, but we don't use any of them; this leaves
420 		 * the pointers dangling but it's safe.
421 		 * |ppw| points at |pw| if getpwnam_r(3) succeeded.
422 		 */
423 		free(buf);
424 		if (err == ERANGE) {
425 			/* |buf| was too small, retry with a bigger one. */
426 			sz <<= 1;
427 		} else if (err != 0) {
428 			/* We got an error not related to the size of |buf|. */
429 			return -err;
430 		} else if (!ppw) {
431 			/* Not found. */
432 			return -ENOENT;
433 		} else {
434 			*uid = ppw->pw_uid;
435 			*gid = ppw->pw_gid;
436 			return 0;
437 		}
438 	} while (sz <= MAX_PWENT_SZ);
439 
440 	/* A buffer of size MAX_PWENT_SZ is still too small, return an error. */
441 	return -ERANGE;
442 }
443 
444 /*
445  * lookup_group: Gets the gid for the given group name.
446  */
lookup_group(const char * group,gid_t * gid)447 int lookup_group(const char *group, gid_t *gid)
448 {
449 	char *buf = NULL;
450 	struct group gr;
451 	struct group *pgr = NULL;
452 	/*
453 	 * sysconf(_SC_GETGR_R_SIZE_MAX), under glibc, is documented to return
454 	 * a suggested starting size for the buffer, so let's try getting this
455 	 * size first, and fallback to a default otherwise.
456 	 */
457 	ssize_t sz = sysconf(_SC_GETGR_R_SIZE_MAX);
458 	if (sz == -1)
459 		sz = 65536; /* and mine is as good as yours, really */
460 
461 	do {
462 		buf = malloc(sz);
463 		if (!buf)
464 			return -ENOMEM;
465 		int err = getgrnam_r(group, &gr, buf, sz, &pgr);
466 		/*
467 		 * We're safe to free the buffer here. The strings inside |gr|
468 		 * point inside |buf|, but we don't use any of them; this leaves
469 		 * the pointers dangling but it's safe.
470 		 * |pgr| points at |gr| if getgrnam_r(3) succeeded.
471 		 */
472 		free(buf);
473 		if (err == ERANGE) {
474 			/* |buf| was too small, retry with a bigger one. */
475 			sz <<= 1;
476 		} else if (err != 0) {
477 			/* We got an error not related to the size of |buf|. */
478 			return -err;
479 		} else if (!pgr) {
480 			/* Not found. */
481 			return -ENOENT;
482 		} else {
483 			*gid = pgr->gr_gid;
484 			return 0;
485 		}
486 	} while (sz <= MAX_GRENT_SZ);
487 
488 	/* A buffer of size MAX_GRENT_SZ is still too small, return an error. */
489 	return -ERANGE;
490 }
491 
seccomp_action_is_available(const char * wanted)492 static bool seccomp_action_is_available(const char *wanted)
493 {
494 	if (is_android()) {
495 		/*
496 		 * Accessing |actions_avail| is generating SELinux denials, so
497 		 * skip for now.
498 		 * TODO(crbug.com/978022, jorgelo): Remove once the denial is
499 		 * fixed.
500 		 */
501 		return false;
502 	}
503 	static const char actions_avail_path[] =
504 	    "/proc/sys/kernel/seccomp/actions_avail";
505 	attribute_cleanup_fp FILE *f = fopen(actions_avail_path, "re");
506 
507 	if (!f) {
508 		pwarn("fopen(%s) failed", actions_avail_path);
509 		return false;
510 	}
511 
512 	attribute_cleanup_str char *actions_avail = NULL;
513 	size_t buf_size = 0;
514 	if (getline(&actions_avail, &buf_size, f) < 0) {
515 		pwarn("getline() failed");
516 		return false;
517 	}
518 
519 	/*
520 	 * This is just substring search, which means that partial matches will
521 	 * match too (e.g. "action" would match "longaction"). There are no
522 	 * seccomp actions which include other actions though, so we're good for
523 	 * now. Eventually we might want to split the string by spaces.
524 	 */
525 	return strstr(actions_avail, wanted) != NULL;
526 }
527 
seccomp_ret_log_available(void)528 int seccomp_ret_log_available(void)
529 {
530 	static int ret_log_available = -1;
531 
532 	if (ret_log_available == -1)
533 		ret_log_available = seccomp_action_is_available("log");
534 
535 	return ret_log_available;
536 }
537 
seccomp_ret_kill_process_available(void)538 int seccomp_ret_kill_process_available(void)
539 {
540 	static int ret_kill_process_available = -1;
541 
542 	if (ret_kill_process_available == -1)
543 		ret_kill_process_available =
544 		    seccomp_action_is_available("kill_process");
545 
546 	return ret_kill_process_available;
547 }
548 
sys_set_no_new_privs(void)549 bool sys_set_no_new_privs(void)
550 {
551 	/*
552 	 * Set no_new_privs. See </kernel/seccomp.c> and </kernel/sys.c>
553 	 * in the kernel source tree for an explanation of the parameters.
554 	 */
555 	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == 0) {
556 		return true;
557 	} else {
558 		pwarn("prctl(PR_SET_NO_NEW_PRIVS) failed");
559 		return false;
560 	}
561 }
562 
seccomp_filter_flags_available(unsigned int flags)563 bool seccomp_filter_flags_available(unsigned int flags)
564 {
565 	return sys_seccomp(SECCOMP_SET_MODE_FILTER, flags, NULL) != -1 ||
566 	       errno != EINVAL;
567 }
568 
is_canonical_path(const char * path)569 bool is_canonical_path(const char *path)
570 {
571 	attribute_cleanup_str char *rp = realpath(path, NULL);
572 	if (!rp) {
573 		pwarn("realpath(%s) failed", path);
574 		return false;
575 	}
576 
577 	if (streq(path, rp)) {
578 		return true;
579 	}
580 
581 	size_t path_len = strlen(path);
582 	size_t rp_len = strlen(rp);
583 	/* If |path| has a single trailing slash, that's OK. */
584 	return path_len == rp_len + 1 && strncmp(path, rp, rp_len) == 0 &&
585 	       path[path_len - 1] == '/';
586 }
587