• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The Chromium OS Authors. All rights reserved.
2  * Use of this source code is governed by a BSD-style license that can be
3  * found in the LICENSE file.
4  */
5 
6 #include <dlfcn.h>
7 #include <errno.h>
8 #include <getopt.h>
9 #include <inttypes.h>
10 #include <stdbool.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/capability.h>
15 #include <sys/mount.h>
16 #include <sys/types.h>
17 #include <unistd.h>
18 
19 #include <linux/filter.h>
20 
21 #include "libminijail.h"
22 #include "libsyscalls.h"
23 
24 #include "elfparse.h"
25 #include "minijail0_cli.h"
26 #include "system.h"
27 #include "util.h"
28 
29 #define IDMAP_LEN 32U
30 #define DEFAULT_TMP_SIZE (64 * 1024 * 1024)
31 
32 /*
33  * A malloc() that aborts on failure.  We only implement this in the CLI as
34  * the library should return ENOMEM errors when allocations fail.
35  */
xmalloc(size_t size)36 static void *xmalloc(size_t size)
37 {
38 	void *ret = malloc(size);
39 	if (!ret) {
40 		perror("malloc() failed");
41 		exit(1);
42 	}
43 	return ret;
44 }
45 
xstrdup(const char * s)46 static char *xstrdup(const char *s)
47 {
48 	char *ret = strdup(s);
49 	if (!ret) {
50 		perror("strdup() failed");
51 		exit(1);
52 	}
53 	return ret;
54 }
55 
set_user(struct minijail * j,const char * arg,uid_t * out_uid,gid_t * out_gid)56 static void set_user(struct minijail *j, const char *arg, uid_t *out_uid,
57 		     gid_t *out_gid)
58 {
59 	char *end = NULL;
60 	int uid = strtod(arg, &end);
61 	if (!*end && *arg) {
62 		*out_uid = uid;
63 		minijail_change_uid(j, uid);
64 		return;
65 	}
66 
67 	int ret = lookup_user(arg, out_uid, out_gid);
68 	if (ret) {
69 		fprintf(stderr, "Bad user '%s': %s\n", arg, strerror(-ret));
70 		exit(1);
71 	}
72 
73 	ret = minijail_change_user(j, arg);
74 	if (ret) {
75 		fprintf(stderr, "minijail_change_user('%s') failed: %s\n", arg,
76 			strerror(-ret));
77 		exit(1);
78 	}
79 }
80 
set_group(struct minijail * j,const char * arg,gid_t * out_gid)81 static void set_group(struct minijail *j, const char *arg, gid_t *out_gid)
82 {
83 	char *end = NULL;
84 	int gid = strtod(arg, &end);
85 	if (!*end && *arg) {
86 		*out_gid = gid;
87 		minijail_change_gid(j, gid);
88 		return;
89 	}
90 
91 	int ret = lookup_group(arg, out_gid);
92 	if (ret) {
93 		fprintf(stderr, "Bad group '%s': %s\n", arg, strerror(-ret));
94 		exit(1);
95 	}
96 
97 	minijail_change_gid(j, *out_gid);
98 }
99 
100 /*
101  * Helper function used by --add-suppl-group (possibly more than once),
102  * to build the supplementary gids array.
103  */
suppl_group_add(size_t * suppl_gids_count,gid_t ** suppl_gids,char * arg)104 static void suppl_group_add(size_t *suppl_gids_count, gid_t **suppl_gids,
105                             char *arg) {
106 	char *end = NULL;
107 	int groupid = strtod(arg, &end);
108 	gid_t gid;
109 	int ret;
110 	if (!*end && *arg) {
111 		/* A gid number has been specified, proceed. */
112 		gid = groupid;
113 	} else if ((ret = lookup_group(arg, &gid))) {
114 		/*
115 		 * A group name has been specified,
116 		 * but doesn't exist: we bail out.
117 		 */
118 		fprintf(stderr, "Bad group '%s': %s\n", arg, strerror(-ret));
119 		exit(1);
120 	}
121 
122 	/*
123 	 * From here, gid is guaranteed to be set and valid,
124 	 * we add it to our supplementary gids array.
125 	 */
126 	*suppl_gids = realloc(*suppl_gids,
127 			      sizeof(gid_t) * ++(*suppl_gids_count));
128 	if (!suppl_gids) {
129 		fprintf(stderr, "failed to allocate memory.\n");
130 		exit(1);
131 	}
132 
133 	(*suppl_gids)[*suppl_gids_count - 1] = gid;
134 }
135 
skip_securebits(struct minijail * j,const char * arg)136 static void skip_securebits(struct minijail *j, const char *arg)
137 {
138 	uint64_t securebits_skip_mask;
139 	char *end = NULL;
140 	securebits_skip_mask = strtoull(arg, &end, 16);
141 	if (*end) {
142 		fprintf(stderr, "Invalid securebit mask: '%s'\n", arg);
143 		exit(1);
144 	}
145 	minijail_skip_setting_securebits(j, securebits_skip_mask);
146 }
147 
use_caps(struct minijail * j,const char * arg)148 static void use_caps(struct minijail *j, const char *arg)
149 {
150 	uint64_t caps = 0;
151 	cap_t parsed_caps = cap_from_text(arg);
152 
153 	if (parsed_caps != NULL) {
154 		unsigned int i;
155 		const uint64_t one = 1;
156 		cap_flag_value_t cap_value;
157 		unsigned int last_valid_cap = get_last_valid_cap();
158 
159 		for (i = 0; i <= last_valid_cap; ++i) {
160 			if (cap_get_flag(parsed_caps, i, CAP_EFFECTIVE,
161 					 &cap_value)) {
162 				if (errno == EINVAL) {
163 					/*
164 					 * Some versions of libcap reject any
165 					 * capabilities they were not compiled
166 					 * with by returning EINVAL.
167 					 */
168 					continue;
169 				}
170 				fprintf(stderr,
171 					"Could not get the value of "
172 					"the %d-th capability: %m\n",
173 					i);
174 				exit(1);
175 			}
176 			if (cap_value == CAP_SET)
177 				caps |= (one << i);
178 		}
179 		cap_free(parsed_caps);
180 	} else {
181 		char *end = NULL;
182 		caps = strtoull(arg, &end, 16);
183 		if (*end) {
184 			fprintf(stderr, "Invalid cap set: '%s'\n", arg);
185 			exit(1);
186 		}
187 	}
188 
189 	minijail_use_caps(j, caps);
190 }
191 
add_binding(struct minijail * j,char * arg)192 static void add_binding(struct minijail *j, char *arg)
193 {
194 	char *src = tokenize(&arg, ",");
195 	char *dest = tokenize(&arg, ",");
196 	char *flags = tokenize(&arg, ",");
197 	if (!src || src[0] == '\0' || arg != NULL) {
198 		fprintf(stderr, "Bad binding: %s %s\n", src, dest);
199 		exit(1);
200 	}
201 	if (dest == NULL || dest[0] == '\0')
202 		dest = src;
203 	int writable;
204 	if (flags == NULL || flags[0] == '\0' || !strcmp(flags, "0"))
205 		writable = 0;
206 	else if (!strcmp(flags, "1"))
207 		writable = 1;
208 	else {
209 		fprintf(stderr, "Bad value for <writable>: %s\n", flags);
210 		exit(1);
211 	}
212 	if (minijail_bind(j, src, dest, writable)) {
213 		fprintf(stderr, "minijail_bind failed.\n");
214 		exit(1);
215 	}
216 }
217 
add_rlimit(struct minijail * j,char * arg)218 static void add_rlimit(struct minijail *j, char *arg)
219 {
220 	char *type = tokenize(&arg, ",");
221 	char *cur = tokenize(&arg, ",");
222 	char *max = tokenize(&arg, ",");
223 	char *end;
224 	if (!type || type[0] == '\0' || !cur || cur[0] == '\0' ||
225 	    !max || max[0] == '\0' || arg != NULL) {
226 		fprintf(stderr, "Bad rlimit '%s'.\n", arg);
227 		exit(1);
228 	}
229 	rlim_t cur_rlim;
230 	rlim_t max_rlim;
231 	if (!strcmp(cur, "unlimited")) {
232 		cur_rlim = RLIM_INFINITY;
233 	} else {
234 		end = NULL;
235 		cur_rlim = strtoul(cur, &end, 0);
236 		if (*end) {
237 			fprintf(stderr, "Bad soft limit: '%s'.\n", cur);
238 			exit(1);
239 		}
240 	}
241 	if (!strcmp(max, "unlimited")) {
242 		max_rlim = RLIM_INFINITY;
243 	} else {
244 		end = NULL;
245 		max_rlim = strtoul(max, &end, 0);
246 		if (*end) {
247 			fprintf(stderr, "Bad hard limit: '%s'.\n", max);
248 			exit(1);
249 		}
250 	}
251 
252 	end = NULL;
253 	int resource = parse_single_constant(type, &end);
254 	if (type == end) {
255 		fprintf(stderr, "Bad rlimit: '%s'.\n", type);
256 		exit(1);
257 	}
258 
259 	if (minijail_rlimit(j, resource, cur_rlim, max_rlim)) {
260 		fprintf(stderr, "minijail_rlimit '%s,%s,%s' failed.\n", type,
261 			cur, max);
262 		exit(1);
263 	}
264 }
265 
add_mount(struct minijail * j,char * arg)266 static void add_mount(struct minijail *j, char *arg)
267 {
268 	char *src = tokenize(&arg, ",");
269 	char *dest = tokenize(&arg, ",");
270 	char *type = tokenize(&arg, ",");
271 	char *flags = tokenize(&arg, ",");
272 	char *data = tokenize(&arg, ",");
273 	char *end;
274 	if (!src || src[0] == '\0' || !dest || dest[0] == '\0' ||
275 	    !type || type[0] == '\0') {
276 		fprintf(stderr, "Bad mount: %s %s %s\n", src, dest, type);
277 		exit(1);
278 	}
279 
280 	/*
281 	 * Fun edge case: the data option itself is comma delimited.  If there
282 	 * were no more options, then arg would be set to NULL.  But if we had
283 	 * more pending, it'll be pointing to the next token.  Back up and undo
284 	 * the null byte so it'll be merged back.
285 	 * An example:
286 	 *   none,/tmp,tmpfs,0xe,mode=0755,uid=10,gid=10
287 	 * The tokenize calls above will turn this memory into:
288 	 *   none\0/tmp\0tmpfs\00xe\0mode=0755\0uid=10,gid=10
289 	 * With data pointing at mode=0755 and arg pointing at uid=10,gid=10.
290 	 */
291 	if (arg != NULL)
292 		arg[-1] = ',';
293 
294 	unsigned long mountflags;
295 	if (flags == NULL || flags[0] == '\0') {
296 		mountflags = 0;
297 	} else {
298 		end = NULL;
299 		mountflags = parse_constant(flags, &end);
300 		if (flags == end) {
301 			fprintf(stderr, "Bad mount flags: %s\n", flags);
302 			exit(1);
303 		}
304 	}
305 
306 	if (minijail_mount_with_data(j, src, dest, type,
307 				     mountflags, data)) {
308 		fprintf(stderr, "minijail_mount failed.\n");
309 		exit(1);
310 	}
311 }
312 
build_idmap(id_t id,id_t lowerid)313 static char *build_idmap(id_t id, id_t lowerid)
314 {
315 	int ret;
316 	char *idmap = xmalloc(IDMAP_LEN);
317 	ret = snprintf(idmap, IDMAP_LEN, "%d %d 1", id, lowerid);
318 	if (ret < 0 || (size_t)ret >= IDMAP_LEN) {
319 		free(idmap);
320 		fprintf(stderr, "Could not build id map.\n");
321 		exit(1);
322 	}
323 	return idmap;
324 }
325 
has_cap_setgid(void)326 static int has_cap_setgid(void)
327 {
328 	cap_t caps;
329 	cap_flag_value_t cap_value;
330 
331 	if (!CAP_IS_SUPPORTED(CAP_SETGID))
332 		return 0;
333 
334 	caps = cap_get_proc();
335 	if (!caps) {
336 		fprintf(stderr, "Could not get process' capabilities: %m\n");
337 		exit(1);
338 	}
339 
340 	if (cap_get_flag(caps, CAP_SETGID, CAP_EFFECTIVE, &cap_value)) {
341 		fprintf(stderr, "Could not get the value of CAP_SETGID: %m\n");
342 		exit(1);
343 	}
344 
345 	if (cap_free(caps)) {
346 		fprintf(stderr, "Could not free capabilities: %m\n");
347 		exit(1);
348 	}
349 
350 	return cap_value == CAP_SET;
351 }
352 
set_ugid_mapping(struct minijail * j,int set_uidmap,uid_t uid,char * uidmap,int set_gidmap,gid_t gid,char * gidmap)353 static void set_ugid_mapping(struct minijail *j, int set_uidmap, uid_t uid,
354 			     char *uidmap, int set_gidmap, gid_t gid,
355 			     char *gidmap)
356 {
357 	if (set_uidmap) {
358 		minijail_namespace_user(j);
359 		minijail_namespace_pids(j);
360 
361 		if (!uidmap) {
362 			/*
363 			 * If no map is passed, map the current uid to the
364 			 * chosen uid in the target namespace (or root, if none
365 			 * was chosen).
366 			 */
367 			uidmap = build_idmap(uid, getuid());
368 		}
369 		if (0 != minijail_uidmap(j, uidmap)) {
370 			fprintf(stderr, "Could not set uid map.\n");
371 			exit(1);
372 		}
373 		free(uidmap);
374 	}
375 	if (set_gidmap) {
376 		minijail_namespace_user(j);
377 		minijail_namespace_pids(j);
378 
379 		if (!gidmap) {
380 			/*
381 			 * If no map is passed, map the current gid to the
382 			 * chosen gid in the target namespace.
383 			 */
384 			gidmap = build_idmap(gid, getgid());
385 		}
386 		if (!has_cap_setgid()) {
387 			/*
388 			 * This means that we are not running as root,
389 			 * so we also have to disable setgroups(2) to
390 			 * be able to set the gid map.
391 			 * See
392 			 * http://man7.org/linux/man-pages/man7/user_namespaces.7.html
393 			 */
394 			minijail_namespace_user_disable_setgroups(j);
395 		}
396 		if (0 != minijail_gidmap(j, gidmap)) {
397 			fprintf(stderr, "Could not set gid map.\n");
398 			exit(1);
399 		}
400 		free(gidmap);
401 	}
402 }
403 
use_chroot(struct minijail * j,const char * path,int * chroot,int pivot_root)404 static void use_chroot(struct minijail *j, const char *path, int *chroot,
405 		       int pivot_root)
406 {
407 	if (pivot_root) {
408 		fprintf(stderr, "Could not set chroot because "
409 				"'-P' was specified.\n");
410 		exit(1);
411 	}
412 	if (minijail_enter_chroot(j, path)) {
413 		fprintf(stderr, "Could not set chroot.\n");
414 		exit(1);
415 	}
416 	*chroot = 1;
417 }
418 
use_pivot_root(struct minijail * j,const char * path,int * pivot_root,int chroot)419 static void use_pivot_root(struct minijail *j, const char *path,
420 			   int *pivot_root, int chroot)
421 {
422 	if (chroot) {
423 		fprintf(stderr, "Could not set pivot_root because "
424 				"'-C' was specified.\n");
425 		exit(1);
426 	}
427 	if (minijail_enter_pivot_root(j, path)) {
428 		fprintf(stderr, "Could not set pivot_root.\n");
429 		exit(1);
430 	}
431 	minijail_namespace_vfs(j);
432 	*pivot_root = 1;
433 }
434 
use_profile(struct minijail * j,const char * profile,int * pivot_root,int chroot,size_t * tmp_size)435 static void use_profile(struct minijail *j, const char *profile,
436 			int *pivot_root, int chroot, size_t *tmp_size)
437 {
438 	/* Note: New profiles should be added in minijail0_cli_unittest.cc. */
439 
440 	if (!strcmp(profile, "minimalistic-mountns") ||
441 	    !strcmp(profile, "minimalistic-mountns-nodev")) {
442 		minijail_namespace_vfs(j);
443 		if (minijail_bind(j, "/", "/", 0)) {
444 			fprintf(stderr, "minijail_bind(/) failed.\n");
445 			exit(1);
446 		}
447 		if (minijail_bind(j, "/proc", "/proc", 0)) {
448 			fprintf(stderr, "minijail_bind(/proc) failed.\n");
449 			exit(1);
450 		}
451 		if (!strcmp(profile, "minimalistic-mountns")) {
452 			if (minijail_bind(j, "/dev/log", "/dev/log", 0)) {
453 				fprintf(stderr, "minijail_bind(/dev/log) failed.\n");
454 				exit(1);
455 			}
456 			minijail_mount_dev(j);
457 		}
458 		if (!*tmp_size) {
459 			/* Avoid clobbering |tmp_size| if it was already set. */
460 			*tmp_size = DEFAULT_TMP_SIZE;
461 		}
462 		minijail_remount_proc_readonly(j);
463 		use_pivot_root(j, DEFAULT_PIVOT_ROOT, pivot_root, chroot);
464 	} else {
465 		fprintf(stderr, "Unrecognized profile name '%s'\n", profile);
466 		exit(1);
467 	}
468 }
469 
set_remount_mode(struct minijail * j,const char * mode)470 static void set_remount_mode(struct minijail *j, const char *mode)
471 {
472 	unsigned long msmode;
473 	if (!strcmp(mode, "shared"))
474 		msmode = MS_SHARED;
475 	else if (!strcmp(mode, "private"))
476 		msmode = MS_PRIVATE;
477 	else if (!strcmp(mode, "slave"))
478 		msmode = MS_SLAVE;
479 	else if (!strcmp(mode, "unbindable"))
480 		msmode = MS_UNBINDABLE;
481 	else {
482 		fprintf(stderr, "Unknown remount mode: '%s'\n", mode);
483 		exit(1);
484 	}
485 	minijail_remount_mode(j, msmode);
486 }
487 
read_seccomp_filter(const char * filter_path,struct sock_fprog * filter)488 static void read_seccomp_filter(const char *filter_path,
489 				struct sock_fprog *filter)
490 {
491 	FILE *f = fopen(filter_path, "re");
492 	if (!f) {
493 		fprintf(stderr, "failed to open %s: %m", filter_path);
494 		exit(1);
495 	}
496 	off_t filter_size = 0;
497 	if (fseeko(f, 0, SEEK_END) == -1 || (filter_size = ftello(f)) == -1) {
498 		fclose(f);
499 		fprintf(stderr, "failed to get file size of %s: %m",
500 			filter_path);
501 		exit(1);
502 	}
503 	if (filter_size % sizeof(struct sock_filter) != 0) {
504 		fclose(f);
505 		fprintf(stderr,
506 			"filter size (%" PRId64
507 			") of %s is not a multiple of %zu: %m",
508 			filter_size, filter_path, sizeof(struct sock_filter));
509 		exit(1);
510 	}
511 	rewind(f);
512 
513 	filter->len = filter_size / sizeof(struct sock_filter);
514 	filter->filter = xmalloc(filter_size);
515 	if (fread(filter->filter, sizeof(struct sock_filter), filter->len, f) !=
516 	    filter->len) {
517 		fclose(f);
518 		fprintf(stderr, "failed read %s: %m", filter_path);
519 		exit(1);
520 	}
521 	fclose(f);
522 }
523 
usage(const char * progn)524 static void usage(const char *progn)
525 {
526 	size_t i;
527 	/* clang-format off */
528 	printf("Usage: %s [-dGhHiIKlLnNprRstUvyYz]\n"
529 	       "  [-a <table>]\n"
530 	       "  [-b <src>[,[dest][,<writeable>]]] [-k <src>,<dest>,<type>[,<flags>[,<data>]]]\n"
531 	       "  [-c <caps>] [-C <dir>] [-P <dir>] [-e[file]] [-f <file>] [-g <group>]\n"
532 	       "  [-m[<uid> <loweruid> <count>]*] [-M[<gid> <lowergid> <count>]*] [--profile <name>]\n"
533 	       "  [-R <type,cur,max>] [-S <file>] [-t[size]] [-T <type>] [-u <user>] [-V <file>]\n"
534 	       "  <program> [args...]\n"
535 	       "  -a <table>:   Use alternate syscall table <table>.\n"
536 	       "  -b <...>:     Bind <src> to <dest> in chroot.\n"
537 	       "                Multiple instances allowed.\n"
538 	       "  -B <mask>:    Skip setting securebits in <mask> when restricting capabilities (-c).\n"
539 	       "                By default, SECURE_NOROOT, SECURE_NO_SETUID_FIXUP, and \n"
540 	       "                SECURE_KEEP_CAPS (together with their respective locks) are set.\n"
541 	       "                There are eight securebits in total.\n"
542 	       "  -k <...>:     Mount <src> at <dest> in chroot.\n"
543 	       "                <flags> and <data> can be specified as in mount(2).\n"
544 	       "                Multiple instances allowed.\n"
545 	       "  -c <caps>:    Restrict caps to <caps>.\n"
546 	       "  -C <dir>:     chroot(2) to <dir>.\n"
547 	       "                Not compatible with -P.\n"
548 	       "  -P <dir>:     pivot_root(2) to <dir> (implies -v).\n"
549 	       "                Not compatible with -C.\n"
550 	       "  --mount-dev,  Create a new /dev with a minimal set of device nodes (implies -v).\n"
551 	       "           -d:  See the minijail0(1) man page for the exact set.\n"
552 	       "  -e[file]:     Enter new network namespace, or existing one if |file| is provided.\n"
553 	       "  -f <file>:    Write the pid of the jailed process to <file>.\n"
554 	       "  -g <group>:   Change gid to <group>.\n"
555 	       "  -G:           Inherit supplementary groups from new uid.\n"
556 	       "                Not compatible with -y or --add-suppl-group.\n"
557 	       "  -y:           Keep original uid's supplementary groups.\n"
558 	       "                Not compatible with -G or --add-suppl-group.\n"
559 	       "  --add-suppl-group <g>:Add <g> to the proccess' supplementary groups,\n"
560 	       "                can be specified multiple times to add several groups.\n"
561 	       "                Not compatible with -y or -G.\n"
562 	       "  -h:           Help (this message).\n"
563 	       "  -H:           Seccomp filter help message.\n"
564 	       "  -i:           Exit immediately after fork(2). The jailed process will run\n"
565 	       "                in the background.\n"
566 	       "  -I:           Run <program> as init (pid 1) inside a new pid namespace (implies -p).\n"
567 	       "  -K:           Do not change share mode of any existing mounts.\n"
568 	       "  -K<mode>:     Mark all existing mounts as <mode> instead of MS_PRIVATE.\n"
569 	       "  -l:           Enter new IPC namespace.\n"
570 	       "  -L:           Report blocked syscalls when using seccomp filter.\n"
571 	       "                If the kernel does not support SECCOMP_RET_LOG,\n"
572 	       "                forces the following syscalls to be allowed:\n"
573 	       "                  ", progn);
574 	/* clang-format on */
575 	for (i = 0; i < log_syscalls_len; i++)
576 		printf("%s ", log_syscalls[i]);
577 
578 	/* clang-format off */
579 	printf("\n"
580 	       "  -m[map]:      Set the uid map of a user namespace (implies -pU).\n"
581 	       "                Same arguments as newuidmap(1), multiple mappings should be separated by ',' (comma).\n"
582 	       "                With no mapping, map the current uid to root inside the user namespace.\n"
583 	       "                Not compatible with -b without the 'writable' option.\n"
584 	       "  -M[map]:      Set the gid map of a user namespace (implies -pU).\n"
585 	       "                Same arguments as newgidmap(1), multiple mappings should be separated by ',' (comma).\n"
586 	       "                With no mapping, map the current gid to root inside the user namespace.\n"
587 	       "                Not compatible with -b without the 'writable' option.\n"
588 	       "  -n:           Set no_new_privs.\n"
589 	       "  -N:           Enter a new cgroup namespace.\n"
590 	       "  -p:           Enter new pid namespace (implies -vr).\n"
591 	       "  -r:           Remount /proc read-only (implies -v).\n"
592 	       "  -R:           Set rlimits, can be specified multiple times.\n"
593 	       "  -s:           Use seccomp mode 1 (not the same as -S).\n"
594 	       "  -S <file>:    Set seccomp filter using <file>.\n"
595 	       "                E.g., '-S /usr/share/filters/<prog>.$(uname -m)'.\n"
596 	       "                Requires -n when not running as root.\n"
597 	       "  -t[size]:     Mount tmpfs at /tmp (implies -v).\n"
598 	       "                Optional argument specifies size (default \"64M\").\n"
599 	       "  -T <type>:    Assume <program> is a <type> ELF binary; <type> can be 'static' or 'dynamic'.\n"
600 	       "                This will avoid accessing <program> binary before execve(2).\n"
601 	       "                Type 'static' will avoid preload hooking.\n"
602 	       "  -u <user>:    Change uid to <user>.\n"
603 	       "  -U:           Enter new user namespace (implies -p).\n"
604 	       "  -v:           Enter new mount namespace.\n"
605 	       "  -V <file>:    Enter specified mount namespace.\n"
606 	       "  -w:           Create and join a new anonymous session keyring.\n"
607 	       "  -Y:           Synchronize seccomp filters across thread group.\n"
608 	       "  -z:           Don't forward signals to jailed process.\n"
609 	       "  --ambient:    Raise ambient capabilities. Requires -c.\n"
610 	       "  --uts[=name]: Enter a new UTS namespace (and set hostname).\n"
611 	       "  --logging=<s>:Use <s> as the logging system.\n"
612 	       "                <s> must be 'auto' (default), 'syslog', or 'stderr'.\n"
613 	       "  --profile <p>:Configure minijail0 to run with the <p> sandboxing profile,\n"
614 	       "                which is a convenient way to express multiple flags\n"
615 	       "                that are typically used together.\n"
616 	       "                See the minijail0(1) man page for the full list.\n"
617 	       "  --preload-library=<f>:Overrides the path to \"" PRELOADPATH "\".\n"
618 	       "                This is only really useful for local testing.\n"
619 	       "  --seccomp-bpf-binary=<f>:Set a pre-compiled seccomp filter using <f>.\n"
620 	       "                E.g., '-S /usr/share/filters/<prog>.$(uname -m).bpf'.\n"
621 	       "                Requires -n when not running as root.\n"
622 	       "                The user is responsible for ensuring that the binary\n"
623 	       "                was compiled for the correct architecture / kernel version.\n"
624 	       "  --allow-speculative-execution:Allow speculative execution and disable\n"
625 	       "                mitigations for speculative execution attacks.\n");
626 	/* clang-format on */
627 }
628 
seccomp_filter_usage(const char * progn)629 static void seccomp_filter_usage(const char *progn)
630 {
631 	const struct syscall_entry *entry = syscall_table;
632 	printf("Usage: %s -S <policy.file> <program> [args...]\n\n"
633 	       "System call names supported:\n",
634 	       progn);
635 	for (; entry->name && entry->nr >= 0; ++entry)
636 		printf("  %s [%d]\n", entry->name, entry->nr);
637 	printf("\nSee minijail0(5) for example policies.\n");
638 }
639 
parse_args(struct minijail * j,int argc,char * const argv[],int * exit_immediately,ElfType * elftype,const char ** preload_path)640 int parse_args(struct minijail *j, int argc, char *const argv[],
641 	       int *exit_immediately, ElfType *elftype,
642 	       const char **preload_path)
643 {
644 	int opt;
645 	int use_seccomp_filter = 0, use_seccomp_filter_binary = 0;
646 	int forward = 1;
647 	int binding = 0;
648 	int chroot = 0, pivot_root = 0;
649 	int mount_ns = 0, change_remount = 0;
650 	const char *remount_mode = NULL;
651 	int inherit_suppl_gids = 0, keep_suppl_gids = 0;
652 	int caps = 0, ambient_caps = 0;
653 	int seccomp = -1;
654 	bool use_uid = false, use_gid = false;
655 	uid_t uid = 0;
656 	gid_t gid = 0;
657 	gid_t *suppl_gids = NULL;
658 	size_t suppl_gids_count = 0;
659 	char *uidmap = NULL, *gidmap = NULL;
660 	int set_uidmap = 0, set_gidmap = 0;
661 	size_t tmp_size = 0;
662 	const char *filter_path = NULL;
663 	int log_to_stderr = -1;
664 
665 	const char *optstring =
666 	    "+u:g:sS:c:C:P:b:B:V:f:m::M::k:a:e::R:T:vrGhHinNplLt::IUK::wyYzd";
667 	/* clang-format off */
668 	const struct option long_options[] = {
669 		{"help", no_argument, 0, 'h'},
670 		{"mount-dev", no_argument, 0, 'd'},
671 		{"ambient", no_argument, 0, 128},
672 		{"uts", optional_argument, 0, 129},
673 		{"logging", required_argument, 0, 130},
674 		{"profile", required_argument, 0, 131},
675 		{"preload-library", required_argument, 0, 132},
676 		{"seccomp-bpf-binary", required_argument, 0, 133},
677 		{"add-suppl-group", required_argument, 0, 134},
678 		{"allow-speculative-execution", no_argument, 0, 135},
679 		{0, 0, 0, 0},
680 	};
681 	/* clang-format on */
682 
683 	while ((opt = getopt_long(argc, argv, optstring, long_options, NULL)) !=
684 	       -1) {
685 		switch (opt) {
686 		case 'u':
687 			if (use_uid) {
688 				fprintf(stderr,
689 					"-u provided multiple times.\n");
690 				exit(1);
691 			}
692 			use_uid = true;
693 			set_user(j, optarg, &uid, &gid);
694 			break;
695 		case 'g':
696 			if (use_gid) {
697 				fprintf(stderr,
698 					"-g provided multiple times.\n");
699 				exit(1);
700 			}
701 			use_gid = true;
702 			set_group(j, optarg, &gid);
703 			break;
704 		case 'n':
705 			minijail_no_new_privs(j);
706 			break;
707 		case 's':
708 			if (seccomp != -1 && seccomp != 1) {
709 				fprintf(stderr,
710 					"Do not use -s, -S, or "
711 					"--seccomp-bpf-binary together.\n");
712 				exit(1);
713 			}
714 			seccomp = 1;
715 			minijail_use_seccomp(j);
716 			break;
717 		case 'S':
718 			if (seccomp != -1 && seccomp != 2) {
719 				fprintf(stderr,
720 					"Do not use -s, -S, or "
721 					"--seccomp-bpf-binary together.\n");
722 				exit(1);
723 			}
724 			seccomp = 2;
725 			minijail_use_seccomp_filter(j);
726 			filter_path = optarg;
727 			use_seccomp_filter = 1;
728 			break;
729 		case 'l':
730 			minijail_namespace_ipc(j);
731 			break;
732 		case 'L':
733 			minijail_log_seccomp_filter_failures(j);
734 			break;
735 		case 'b':
736 			add_binding(j, optarg);
737 			binding = 1;
738 			break;
739 		case 'B':
740 			skip_securebits(j, optarg);
741 			break;
742 		case 'c':
743 			caps = 1;
744 			use_caps(j, optarg);
745 			break;
746 		case 'C':
747 			use_chroot(j, optarg, &chroot, pivot_root);
748 			break;
749 		case 'k':
750 			add_mount(j, optarg);
751 			break;
752 		case 'K':
753 			remount_mode = optarg;
754 			change_remount = 1;
755 			break;
756 		case 'P':
757 			use_pivot_root(j, optarg, &pivot_root, chroot);
758 			break;
759 		case 'f':
760 			if (0 != minijail_write_pid_file(j, optarg)) {
761 				fprintf(stderr,
762 					"Could not prepare pid file path.\n");
763 				exit(1);
764 			}
765 			break;
766 		case 't':
767 			minijail_namespace_vfs(j);
768 			if (!tmp_size) {
769 				/*
770 				 * Avoid clobbering |tmp_size| if it was already
771 				 * set.
772 				 */
773 				tmp_size = DEFAULT_TMP_SIZE;
774 			}
775 			if (optarg != NULL &&
776 			    0 != parse_size(&tmp_size, optarg)) {
777 				fprintf(stderr, "Invalid /tmp tmpfs size.\n");
778 				exit(1);
779 			}
780 			break;
781 		case 'v':
782 			minijail_namespace_vfs(j);
783 			/*
784 			 * Set the default mount propagation in the command-line
785 			 * tool to MS_SLAVE.
786 			 *
787 			 * When executing the sandboxed program in a new mount
788 			 * namespace the Minijail library will by default
789 			 * remount all mounts with the MS_PRIVATE flag. While
790 			 * this is an appropriate, safe default for the library,
791 			 * MS_PRIVATE can be problematic: unmount events will
792 			 * not propagate into mountpoints marked as MS_PRIVATE.
793 			 * This means that if a mount is unmounted in the root
794 			 * mount namespace, it will not be unmounted in the
795 			 * non-root mount namespace.
796 			 * This in turn can be problematic because activity in
797 			 * the non-root mount namespace can now directly
798 			 * influence the root mount namespace (e.g. preventing
799 			 * re-mounts of said mount), which would be a privilege
800 			 * inversion.
801 			 *
802 			 * Setting the default in the command-line to MS_SLAVE
803 			 * will still prevent mounts from leaking out of the
804 			 * non-root mount namespace but avoid these
805 			 * privilege-inversion issues.
806 			 * For cases where mounts should not flow *into* the
807 			 * namespace either, the user can pass -Kprivate.
808 			 * Note that mounts are marked as MS_PRIVATE by default
809 			 * by the kernel, so unless the init process (like
810 			 * systemd) or something else marks them as shared, this
811 			 * won't do anything.
812 			 */
813 			minijail_remount_mode(j, MS_SLAVE);
814 			mount_ns = 1;
815 			break;
816 		case 'V':
817 			minijail_namespace_enter_vfs(j, optarg);
818 			break;
819 		case 'r':
820 			minijail_remount_proc_readonly(j);
821 			break;
822 		case 'G':
823 			if (keep_suppl_gids) {
824 				fprintf(stderr,
825 					"-y and -G are not compatible.\n");
826 				exit(1);
827 			}
828 			minijail_inherit_usergroups(j);
829 			inherit_suppl_gids = 1;
830 			break;
831 		case 'y':
832 			if (inherit_suppl_gids) {
833 				fprintf(stderr,
834 					"-y and -G are not compatible.\n");
835 				exit(1);
836 			}
837 			minijail_keep_supplementary_gids(j);
838 			keep_suppl_gids = 1;
839 			break;
840 		case 'N':
841 			minijail_namespace_cgroups(j);
842 			break;
843 		case 'p':
844 			minijail_namespace_pids(j);
845 			break;
846 		case 'e':
847 			if (optarg)
848 				minijail_namespace_enter_net(j, optarg);
849 			else
850 				minijail_namespace_net(j);
851 			break;
852 		case 'i':
853 			*exit_immediately = 1;
854 			break;
855 		case 'H':
856 			seccomp_filter_usage(argv[0]);
857 			exit(0);
858 		case 'I':
859 			minijail_namespace_pids(j);
860 			minijail_run_as_init(j);
861 			break;
862 		case 'U':
863 			minijail_namespace_user(j);
864 			minijail_namespace_pids(j);
865 			break;
866 		case 'm':
867 			set_uidmap = 1;
868 			if (uidmap) {
869 				free(uidmap);
870 				uidmap = NULL;
871 			}
872 			if (optarg)
873 				uidmap = xstrdup(optarg);
874 			break;
875 		case 'M':
876 			set_gidmap = 1;
877 			if (gidmap) {
878 				free(gidmap);
879 				gidmap = NULL;
880 			}
881 			if (optarg)
882 				gidmap = xstrdup(optarg);
883 			break;
884 		case 'a':
885 			if (0 != minijail_use_alt_syscall(j, optarg)) {
886 				fprintf(stderr,
887 					"Could not set alt-syscall table.\n");
888 				exit(1);
889 			}
890 			break;
891 		case 'R':
892 			add_rlimit(j, optarg);
893 			break;
894 		case 'T':
895 			if (!strcmp(optarg, "static"))
896 				*elftype = ELFSTATIC;
897 			else if (!strcmp(optarg, "dynamic"))
898 				*elftype = ELFDYNAMIC;
899 			else {
900 				fprintf(stderr, "ELF type must be 'static' or "
901 						"'dynamic'.\n");
902 				exit(1);
903 			}
904 			break;
905 		case 'w':
906 			minijail_new_session_keyring(j);
907 			break;
908 		case 'Y':
909 			minijail_set_seccomp_filter_tsync(j);
910 			break;
911 		case 'z':
912 			forward = 0;
913 			break;
914 		case 'd':
915 			minijail_namespace_vfs(j);
916 			minijail_mount_dev(j);
917 			break;
918 		/* Long options. */
919 		case 128: /* Ambient caps. */
920 			ambient_caps = 1;
921 			minijail_set_ambient_caps(j);
922 			break;
923 		case 129: /* UTS/hostname namespace. */
924 			minijail_namespace_uts(j);
925 			if (optarg)
926 				minijail_namespace_set_hostname(j, optarg);
927 			break;
928 		case 130: /* Logging. */
929 			if (!strcmp(optarg, "auto")) {
930 				log_to_stderr = -1;
931 			} else if (!strcmp(optarg, "syslog")) {
932 				log_to_stderr = 0;
933 			} else if (!strcmp(optarg, "stderr")) {
934 				log_to_stderr = 1;
935 			} else {
936 				fprintf(stderr, "--logger must be 'syslog' or "
937 						"'stderr'.\n");
938 				exit(1);
939 			}
940 			break;
941 		case 131: /* Profile */
942 			use_profile(j, optarg, &pivot_root, chroot, &tmp_size);
943 			break;
944 		case 132: /* PRELOADPATH */
945 			*preload_path = optarg;
946 			break;
947 		case 133: /* seccomp-bpf binary. */
948 			if (seccomp != -1 && seccomp != 3) {
949 				fprintf(stderr,
950 					"Do not use -s, -S, or "
951 					"--seccomp-bpf-binary together.\n");
952 				exit(1);
953 			}
954 			seccomp = 3;
955 			minijail_use_seccomp_filter(j);
956 			filter_path = optarg;
957 			use_seccomp_filter_binary = 1;
958 			break;
959 		case 134:
960 			suppl_group_add(&suppl_gids_count, &suppl_gids,
961 			                optarg);
962 			break;
963 		case 135:
964 			minijail_set_seccomp_filter_allow_speculation(j);
965 			break;
966 		default:
967 			usage(argv[0]);
968 			exit(opt == 'h' ? 0 : 1);
969 		}
970 	}
971 
972 	if (log_to_stderr == -1) {
973 		/* Autodetect default logging output. */
974 		log_to_stderr = isatty(STDIN_FILENO) ? 1 : 0;
975 	}
976 	if (log_to_stderr) {
977 		init_logging(LOG_TO_FD, STDERR_FILENO, LOG_INFO);
978 		/*
979 		 * When logging to stderr, ensure the FD survives the jailing.
980 		 */
981 		if (0 !=
982 		    minijail_preserve_fd(j, STDERR_FILENO, STDERR_FILENO)) {
983 			fprintf(stderr, "Could not preserve stderr.\n");
984 			exit(1);
985 		}
986 	}
987 
988 	/* Set up uid/gid mapping. */
989 	if (set_uidmap || set_gidmap) {
990 		set_ugid_mapping(j, set_uidmap, uid, uidmap, set_gidmap, gid,
991 				 gidmap);
992 	}
993 
994 	/* Can only set ambient caps when using regular caps. */
995 	if (ambient_caps && !caps) {
996 		fprintf(stderr, "Can't set ambient capabilities (--ambient) "
997 				"without actually using capabilities (-c).\n");
998 		exit(1);
999 	}
1000 
1001 	/* Set up signal handlers in minijail unless asked not to. */
1002 	if (forward)
1003 		minijail_forward_signals(j);
1004 
1005 	/*
1006 	 * Only allow bind mounts when entering a chroot, using pivot_root, or
1007 	 * a new mount namespace.
1008 	 */
1009 	if (binding && !(chroot || pivot_root || mount_ns)) {
1010 		fprintf(stderr, "Bind mounts require a chroot, pivot_root, or "
1011 				" new mount namespace.\n");
1012 		exit(1);
1013 	}
1014 
1015 	/*
1016 	 * / is only remounted when entering a new mount namespace, so unless
1017 	 * that's set there is no need for the -K/-K<mode> flags.
1018 	 */
1019 	if (change_remount && !mount_ns) {
1020 		fprintf(stderr, "No need to use -K (skip remounting '/') or "
1021 				"-K<mode> (remount '/' as <mode>)\n"
1022 				"without -v (new mount namespace).\n"
1023 				"Do you need to add '-v' explicitly?\n");
1024 		exit(1);
1025 	}
1026 
1027 	/* Configure the remount flag here to avoid having -v override it. */
1028 	if (change_remount) {
1029 		if (remount_mode != NULL) {
1030 			set_remount_mode(j, remount_mode);
1031 		} else {
1032 			minijail_skip_remount_private(j);
1033 		}
1034 	}
1035 
1036 	/*
1037 	 * Proceed in setting the supplementary gids specified on the
1038 	 * cmdline options.
1039 	 */
1040 	if (suppl_gids_count) {
1041 		minijail_set_supplementary_gids(j, suppl_gids_count,
1042 		                                suppl_gids);
1043 		free(suppl_gids);
1044 	}
1045 
1046 	/*
1047 	 * We parse seccomp filters here to make sure we've collected all
1048 	 * cmdline options.
1049 	 */
1050 	if (use_seccomp_filter) {
1051 		minijail_parse_seccomp_filters(j, filter_path);
1052 	} else if (use_seccomp_filter_binary) {
1053 		struct sock_fprog filter;
1054 		read_seccomp_filter(filter_path, &filter);
1055 		minijail_set_seccomp_filters(j, &filter);
1056 		free((void *)filter.filter);
1057 	}
1058 
1059 	/* Mount a tmpfs under /tmp and set its size. */
1060 	if (tmp_size)
1061 		minijail_mount_tmp_size(j, tmp_size);
1062 
1063 	/*
1064 	 * There should be at least one additional unparsed argument: the
1065 	 * executable name.
1066 	 */
1067 	if (argc == optind) {
1068 		usage(argv[0]);
1069 		exit(1);
1070 	}
1071 
1072 	if (*elftype == ELFERROR) {
1073 		/*
1074 		 * -T was not specified.
1075 		 * Get the path to the program adjusted for changing root.
1076 		 */
1077 		char *program_path =
1078 		    minijail_get_original_path(j, argv[optind]);
1079 
1080 		/* Check that we can access the target program. */
1081 		if (access(program_path, X_OK)) {
1082 			fprintf(stderr,
1083 				"Target program '%s' is not accessible.\n",
1084 				argv[optind]);
1085 			exit(1);
1086 		}
1087 
1088 		/* Check if target is statically or dynamically linked. */
1089 		*elftype = get_elf_linkage(program_path);
1090 		free(program_path);
1091 	}
1092 
1093 	/*
1094 	 * Setting capabilities need either a dynamically-linked binary, or the
1095 	 * use of ambient capabilities for them to be able to survive an
1096 	 * execve(2).
1097 	 */
1098 	if (caps && *elftype == ELFSTATIC && !ambient_caps) {
1099 		fprintf(stderr, "Can't run statically-linked binaries with "
1100 				"capabilities (-c) without also setting "
1101 				"ambient capabilities. Try passing "
1102 				"--ambient.\n");
1103 		exit(1);
1104 	}
1105 
1106 	return optind;
1107 }
1108