/* * Copyright (c) 1997-8,2007,11,19,20 Andrew G Morgan * * This file deals with getting and setting capabilities on processes. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include /* Obtain O_* constant definitions */ #include #include #include #include #include #include #include #include #include "libcap.h" /* * libcap uses this abstraction for all system calls that change * kernel managed capability state. This permits the user to redirect * it for testing and also to better implement posix semantics when * using pthreads. */ static long int _cap_syscall3(long int syscall_nr, long int arg1, long int arg2, long int arg3) { return syscall(syscall_nr, arg1, arg2, arg3); } static long int _cap_syscall6(long int syscall_nr, long int arg1, long int arg2, long int arg3, long int arg4, long int arg5, long int arg6) { return syscall(syscall_nr, arg1, arg2, arg3, arg4, arg5, arg6); } /* * to keep the structure of the code conceptually similar in C and Go * implementations, we introduce this abstraction for invoking state * writing system calls. In psx+pthreaded code, the fork * implementation provided by nptl ensures that we can consistently * use the multithreaded syscalls even in the child after a fork(). */ struct syscaller_s { long int (*three)(long int syscall_nr, long int arg1, long int arg2, long int arg3); long int (*six)(long int syscall_nr, long int arg1, long int arg2, long int arg3, long int arg4, long int arg5, long int arg6); }; /* use this syscaller for multi-threaded code */ static struct syscaller_s multithread = { .three = _cap_syscall3, .six = _cap_syscall6 }; /* use this syscaller for single-threaded code */ static struct syscaller_s singlethread = { .three = _cap_syscall3, .six = _cap_syscall6 }; /* * This gets reset to 0 if we are *not* linked with libpsx. */ static int _libcap_overrode_syscalls = 1; /* * psx_load_syscalls() is weakly defined so we can have it overridden * by libpsx if that library is linked. Specifically, when libcap * calls psx_load_sycalls() it is prepared to override the default * values for the syscalls that libcap uses to change security state. * As can be seen here this present function is mostly a * no-op. However, if libpsx is linked, the one present in that * library (not being weak) will replace this one and the * _libcap_overrode_syscalls value isn't forced to zero. * * Note: we hardcode the prototype for the psx_load_syscalls() * function here so the compiler isn't worried. If we force the build * to include the header, we are close to requiring the optional * libpsx to be linked. */ void psx_load_syscalls(long int (**syscall_fn)(long int, long int, long int, long int), long int (**syscall6_fn)(long int, long int, long int, long int, long int, long int, long int)); __attribute__((weak)) void psx_load_syscalls(long int (**syscall_fn)(long int, long int, long int, long int), long int (**syscall6_fn)(long int, long int, long int, long int, long int, long int, long int)) { _libcap_overrode_syscalls = 0; } /* * cap_set_syscall overrides the state setting syscalls that libcap does. * Generally, you don't need to call this manually: libcap tries hard to * set things up appropriately. */ void cap_set_syscall(long int (*new_syscall)(long int, long int, long int, long int), long int (*new_syscall6)(long int, long int, long int, long int, long int, long int, long int)) { if (new_syscall == NULL) { psx_load_syscalls(&multithread.three, &multithread.six); } else { multithread.three = new_syscall; multithread.six = new_syscall6; } } static int _libcap_capset(struct syscaller_s *sc, cap_user_header_t header, const cap_user_data_t data) { if (_libcap_overrode_syscalls) { return sc->three(SYS_capset, (long int) header, (long int) data, 0); } return capset(header, data); } static int _libcap_wprctl3(struct syscaller_s *sc, long int pr_cmd, long int arg1, long int arg2) { if (_libcap_overrode_syscalls) { return sc->three(SYS_prctl, pr_cmd, arg1, arg2); } return prctl(pr_cmd, arg1, arg2, 0, 0, 0); } static int _libcap_wprctl6(struct syscaller_s *sc, long int pr_cmd, long int arg1, long int arg2, long int arg3, long int arg4, long int arg5) { if (_libcap_overrode_syscalls) { return sc->six(SYS_prctl, pr_cmd, arg1, arg2, arg3, arg4, arg5); } return prctl(pr_cmd, arg1, arg2, arg3, arg4, arg5); } /* * cap_get_proc obtains the capability set for the current process. */ cap_t cap_get_proc(void) { cap_t result; /* allocate a new capability set */ result = cap_init(); if (result) { _cap_debug("getting current process' capabilities"); /* fill the capability sets via a system call */ if (capget(&result->head, &result->u[0].set)) { cap_free(result); result = NULL; } } return result; } static int _cap_set_proc(struct syscaller_s *sc, cap_t cap_d) { int retval; if (!good_cap_t(cap_d)) { errno = EINVAL; return -1; } _cap_debug("setting process capabilities"); retval = _libcap_capset(sc, &cap_d->head, &cap_d->u[0].set); return retval; } int cap_set_proc(cap_t cap_d) { return _cap_set_proc(&multithread, cap_d); } /* the following two functions are not required by POSIX */ /* read the caps on a specific process */ int capgetp(pid_t pid, cap_t cap_d) { int error; if (!good_cap_t(cap_d)) { errno = EINVAL; return -1; } _cap_debug("getting process capabilities for proc %d", pid); cap_d->head.pid = pid; error = capget(&cap_d->head, &cap_d->u[0].set); cap_d->head.pid = 0; return error; } /* allocate space for and return capabilities of target process */ cap_t cap_get_pid(pid_t pid) { cap_t result; result = cap_init(); if (result) { if (capgetp(pid, result) != 0) { int my_errno; my_errno = errno; cap_free(result); errno = my_errno; result = NULL; } } return result; } /* * set the caps on a specific process/pg etc.. The kernel has long * since deprecated this asynchronous interface. DON'T EXPECT THIS TO * EVER WORK AGAIN. */ int capsetp(pid_t pid, cap_t cap_d) { int error; if (!good_cap_t(cap_d)) { errno = EINVAL; return -1; } _cap_debug("setting process capabilities for proc %d", pid); cap_d->head.pid = pid; error = capset(&cap_d->head, &cap_d->u[0].set); cap_d->head.version = _LIBCAP_CAPABILITY_VERSION; cap_d->head.pid = 0; return error; } /* the kernel api requires unsigned long arguments */ #define pr_arg(x) ((unsigned long) x) /* get a capability from the bounding set */ int cap_get_bound(cap_value_t cap) { int result; result = prctl(PR_CAPBSET_READ, pr_arg(cap), pr_arg(0)); if (result < 0) { errno = -result; return -1; } return result; } static int _cap_drop_bound(struct syscaller_s *sc, cap_value_t cap) { int result; result = _libcap_wprctl3(sc, PR_CAPBSET_DROP, pr_arg(cap), pr_arg(0)); if (result < 0) { errno = -result; return -1; } return result; } /* drop a capability from the bounding set */ int cap_drop_bound(cap_value_t cap) { return _cap_drop_bound(&multithread, cap); } /* get a capability from the ambient set */ int cap_get_ambient(cap_value_t cap) { int result; result = prctl(PR_CAP_AMBIENT, pr_arg(PR_CAP_AMBIENT_IS_SET), pr_arg(cap), pr_arg(0), pr_arg(0)); if (result < 0) { errno = -result; return -1; } return result; } static int _cap_set_ambient(struct syscaller_s *sc, cap_value_t cap, cap_flag_value_t set) { int result, val; switch (set) { case CAP_SET: val = PR_CAP_AMBIENT_RAISE; break; case CAP_CLEAR: val = PR_CAP_AMBIENT_LOWER; break; default: errno = EINVAL; return -1; } result = _libcap_wprctl6(sc, PR_CAP_AMBIENT, pr_arg(val), pr_arg(cap), pr_arg(0), pr_arg(0), pr_arg(0)); if (result < 0) { errno = -result; return -1; } return result; } /* * cap_set_ambient modifies a single ambient capability value. */ int cap_set_ambient(cap_value_t cap, cap_flag_value_t set) { return _cap_set_ambient(&multithread, cap, set); } static int _cap_reset_ambient(struct syscaller_s *sc) { int olderrno = errno; cap_value_t c; int result = 0; for (c = 0; !result; c++) { result = cap_get_ambient(c); if (result == -1) { errno = olderrno; return 0; } } result = _libcap_wprctl6(sc, PR_CAP_AMBIENT, pr_arg(PR_CAP_AMBIENT_CLEAR_ALL), pr_arg(0), pr_arg(0), pr_arg(0), pr_arg(0)); if (result < 0) { errno = -result; return -1; } return result; } /* * cap_reset_ambient erases all ambient capabilities - this reads the * ambient caps before performing the erase to workaround the corner * case where the set is empty already but the ambient cap API is * locked. */ int cap_reset_ambient() { return _cap_reset_ambient(&multithread); } /* * Read the security mode of the current process. */ unsigned cap_get_secbits(void) { return (unsigned) prctl(PR_GET_SECUREBITS, pr_arg(0), pr_arg(0)); } static int _cap_set_secbits(struct syscaller_s *sc, unsigned bits) { return _libcap_wprctl3(sc, PR_SET_SECUREBITS, bits, 0); } /* * Set the secbits of the current process. */ int cap_set_secbits(unsigned bits) { return _cap_set_secbits(&multithread, bits); } /* * Attempt to raise the no new privs prctl value. */ static void _cap_set_no_new_privs(struct syscaller_s *sc) { (void) _libcap_wprctl6(sc, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); } /* * Some predefined constants */ #define CAP_SECURED_BITS_BASIC \ (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED | \ SECBIT_NO_SETUID_FIXUP | SECBIT_NO_SETUID_FIXUP_LOCKED | \ SECBIT_KEEP_CAPS_LOCKED) #define CAP_SECURED_BITS_AMBIENT (CAP_SECURED_BITS_BASIC | \ SECBIT_NO_CAP_AMBIENT_RAISE | SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED) static cap_value_t raise_cap_setpcap[] = {CAP_SETPCAP}; static int _cap_set_mode(struct syscaller_s *sc, cap_mode_t flavor) { cap_t working = cap_get_proc(); unsigned secbits = CAP_SECURED_BITS_AMBIENT; int ret = cap_set_flag(working, CAP_EFFECTIVE, 1, raise_cap_setpcap, CAP_SET); ret = ret | _cap_set_proc(sc, working); if (ret == 0) { cap_flag_t c; switch (flavor) { case CAP_MODE_NOPRIV: /* fall through */ case CAP_MODE_PURE1E_INIT: (void) cap_clear_flag(working, CAP_INHERITABLE); /* fall through */ case CAP_MODE_PURE1E: if (!CAP_AMBIENT_SUPPORTED()) { secbits = CAP_SECURED_BITS_BASIC; } else { ret = _cap_reset_ambient(sc); if (ret) { break; /* ambient dropping failed */ } } ret = _cap_set_secbits(sc, secbits); if (flavor != CAP_MODE_NOPRIV) { break; } /* just for "case CAP_MODE_NOPRIV:" */ for (c = 0; cap_get_bound(c) >= 0; c++) { (void) _cap_drop_bound(sc, c); } (void) cap_clear_flag(working, CAP_PERMITTED); /* for good measure */ _cap_set_no_new_privs(sc); break; default: errno = EINVAL; ret = -1; break; } } (void) cap_clear_flag(working, CAP_EFFECTIVE); ret = _cap_set_proc(sc, working) | ret; (void) cap_free(working); return ret; } /* * cap_set_mode locks the overarching capability framework of the * present process and thus its children to a predefined flavor. Once * set, these modes cannot be undone by the affected process tree and * can only be done by "cap_setpcap" permitted processes. Note, a side * effect of this function, whether it succeeds or fails, is to clear * at least the CAP_EFFECTIVE flags for the current process. */ int cap_set_mode(cap_mode_t flavor) { return _cap_set_mode(&multithread, flavor); } /* * cap_get_mode attempts to determine what the current capability mode * is. If it can find no match in the libcap pre-defined modes, it * returns CAP_MODE_UNCERTAIN. */ cap_mode_t cap_get_mode(void) { unsigned secbits = cap_get_secbits(); if ((secbits & CAP_SECURED_BITS_BASIC) != CAP_SECURED_BITS_BASIC) { return CAP_MODE_UNCERTAIN; } /* validate ambient is not set */ int olderrno = errno; int ret = 0; cap_value_t c; for (c = 0; !ret; c++) { ret = cap_get_ambient(c); if (ret == -1) { errno = olderrno; if (c && secbits != CAP_SECURED_BITS_AMBIENT) { return CAP_MODE_UNCERTAIN; } break; } if (ret) { return CAP_MODE_UNCERTAIN; } } cap_t working = cap_get_proc(); cap_t empty = cap_init(); int cf = cap_compare(empty, working); cap_free(empty); cap_free(working); if (CAP_DIFFERS(cf, CAP_INHERITABLE)) { return CAP_MODE_PURE1E; } if (CAP_DIFFERS(cf, CAP_PERMITTED) || CAP_DIFFERS(cf, CAP_EFFECTIVE)) { return CAP_MODE_PURE1E_INIT; } for (c = 0; ; c++) { int v = cap_get_bound(c); if (v == -1) { break; } if (v) { return CAP_MODE_PURE1E_INIT; } } return CAP_MODE_NOPRIV; } static int _cap_setuid(struct syscaller_s *sc, uid_t uid) { const cap_value_t raise_cap_setuid[] = {CAP_SETUID}; cap_t working = cap_get_proc(); (void) cap_set_flag(working, CAP_EFFECTIVE, 1, raise_cap_setuid, CAP_SET); /* * Note, we are cognizant of not using glibc's setuid in the case * that we've modified the way libcap is doing setting * syscalls. This is because prctl needs to be working in a POSIX * compliant way for the code below to work, so we are either * all-broken or not-broken and don't allow for "sort of working". */ (void) _libcap_wprctl3(sc, PR_SET_KEEPCAPS, 1, 0); int ret = _cap_set_proc(sc, working); if (ret == 0) { if (_libcap_overrode_syscalls) { ret = sc->three(SYS_setuid, (long int) uid, 0, 0); if (ret < 0) { errno = -ret; ret = -1; } } else { ret = setuid(uid); } } int olderrno = errno; (void) _libcap_wprctl3(sc, PR_SET_KEEPCAPS, 0, 0); (void) cap_clear_flag(working, CAP_EFFECTIVE); (void) _cap_set_proc(sc, working); (void) cap_free(working); errno = olderrno; return ret; } /* * cap_setuid attempts to set the uid of the process without dropping * any permitted capabilities in the process. A side effect of a call * to this function is that the effective set will be cleared by the * time the function returns. */ int cap_setuid(uid_t uid) { return _cap_setuid(&multithread, uid); } #if defined(__arm__) || defined(__i386__) || \ defined(__i486__) || defined(__i586__) || defined(__i686__) #define sys_setgroups_variant SYS_setgroups32 #else #define sys_setgroups_variant SYS_setgroups #endif static int _cap_setgroups(struct syscaller_s *sc, gid_t gid, size_t ngroups, const gid_t groups[]) { const cap_value_t raise_cap_setgid[] = {CAP_SETGID}; cap_t working = cap_get_proc(); (void) cap_set_flag(working, CAP_EFFECTIVE, 1, raise_cap_setgid, CAP_SET); /* * Note, we are cognizant of not using glibc's setgid etc in the * case that we've modified the way libcap is doing setting * syscalls. This is because prctl needs to be working in a POSIX * compliant way for the other functions of this file so we are * all-broken or not-broken and don't allow for "sort of working". */ int ret = _cap_set_proc(sc, working); if (_libcap_overrode_syscalls) { if (ret == 0) { ret = sc->three(SYS_setgid, (long int) gid, 0, 0); } if (ret == 0) { ret = sc->three(sys_setgroups_variant, (long int) ngroups, (long int) groups, 0); } if (ret < 0) { errno = -ret; ret = -1; } } else { if (ret == 0) { ret = setgid(gid); } if (ret == 0) { ret = setgroups(ngroups, groups); } } int olderrno = errno; (void) cap_clear_flag(working, CAP_EFFECTIVE); (void) _cap_set_proc(sc, working); (void) cap_free(working); errno = olderrno; return ret; } /* * cap_setgroups combines setting the gid with changing the set of * supplemental groups for a user into one call that raises the needed * capabilities to do it for the duration of the call. A side effect * of a call to this function is that the effective set will be * cleared by the time the function returns. */ int cap_setgroups(gid_t gid, size_t ngroups, const gid_t groups[]) { return _cap_setgroups(&multithread, gid, ngroups, groups); } /* * cap_iab_get_proc returns a cap_iab_t value initialized by the * current process state related to these iab bits. */ cap_iab_t cap_iab_get_proc(void) { cap_iab_t iab = cap_iab_init(); cap_t current = cap_get_proc(); cap_iab_fill(iab, CAP_IAB_INH, current, CAP_INHERITABLE); cap_value_t c; for (c = cap_max_bits(); c; ) { --c; int o = c >> 5; __u32 mask = 1U << (c & 31); if (cap_get_bound(c) == 0) { iab->nb[o] |= mask; } if (cap_get_ambient(c) == 1) { iab->a[o] |= mask; } } return iab; } /* * _cap_iab_set_proc sets the iab collection using the requested syscaller. */ static int _cap_iab_set_proc(struct syscaller_s *sc, cap_iab_t iab) { int ret, i; cap_t working, temp = cap_get_proc(); cap_value_t c; int raising = 0; for (i = 0; i < _LIBCAP_CAPABILITY_U32S; i++) { __u32 newI = iab->i[i]; __u32 oldIP = temp->u[i].flat[CAP_INHERITABLE] | temp->u[i].flat[CAP_PERMITTED]; raising |= (newI & ~oldIP) | iab->a[i] | iab->nb[i]; temp->u[i].flat[CAP_INHERITABLE] = newI; } working = cap_dup(temp); if (raising) { ret = cap_set_flag(working, CAP_EFFECTIVE, 1, raise_cap_setpcap, CAP_SET); if (ret) { goto defer; } } if ((ret = _cap_set_proc(sc, working))) { goto defer; } if ((ret = _cap_reset_ambient(sc))) { goto done; } for (c = cap_max_bits(); c-- != 0; ) { unsigned offset = c >> 5; __u32 mask = 1U << (c & 31); if (iab->a[offset] & mask) { ret = _cap_set_ambient(sc, c, CAP_SET); if (ret) { goto done; } } if (iab->nb[offset] & mask) { /* drop the bounding bit */ ret = _cap_drop_bound(sc, c); if (ret) { goto done; } } } done: (void) cap_set_proc(temp); defer: cap_free(working); cap_free(temp); return ret; } /* * cap_iab_set_proc sets the iab capability vectors of the current * process. */ int cap_iab_set_proc(cap_iab_t iab) { return _cap_iab_set_proc(&multithread, iab); } /* * cap_launcher_callback primes the launcher with a callback that will * be invoked after the fork() but before any privilege has changed * and before the execve(). This can be used to augment the state of * the child process within the cap_launch() process. You can cancel * any callback associated with a launcher by calling this function * with a callback_fn value NULL. * * If the callback function returns anything other than 0, it is * considered to have failed and the launch will be aborted - further, * errno will be communicated to the parent. */ void cap_launcher_callback(cap_launch_t attr, int (callback_fn)(void *detail)) { attr->custom_setup_fn = callback_fn; } /* * cap_launcher_setuid primes the launcher to attempt a change of uid. */ void cap_launcher_setuid(cap_launch_t attr, uid_t uid) { attr->uid = uid; attr->change_uids = 1; } /* * cap_launcher_setgroups primes the launcher to attempt a change of * gid and groups. */ void cap_launcher_setgroups(cap_launch_t attr, gid_t gid, int ngroups, const gid_t *groups) { attr->gid = gid; attr->ngroups = ngroups; attr->groups = groups; attr->change_gids = 1; } /* * cap_launcher_set_mode primes the launcher to attempt a change of * mode. */ void cap_launcher_set_mode(cap_launch_t attr, cap_mode_t flavor) { attr->mode = flavor; attr->change_mode = 1; } /* * cap_launcher_set_iab primes the launcher to attempt to change the iab bits of * the launched child. */ cap_iab_t cap_launcher_set_iab(cap_launch_t attr, cap_iab_t iab) { cap_iab_t old = attr->iab; attr->iab = iab; return old; } /* * cap_launcher_set_chroot sets the intended chroot for the launched * child. */ void cap_launcher_set_chroot(cap_launch_t attr, const char *chroot) { attr->chroot = _libcap_strdup(chroot); } static int _cap_chroot(struct syscaller_s *sc, const char *root) { const cap_value_t raise_cap_sys_chroot[] = {CAP_SYS_CHROOT}; cap_t working = cap_get_proc(); (void) cap_set_flag(working, CAP_EFFECTIVE, 1, raise_cap_sys_chroot, CAP_SET); int ret = _cap_set_proc(sc, working); if (ret == 0) { if (_libcap_overrode_syscalls) { ret = sc->three(SYS_chroot, (long int) root, 0, 0); if (ret < 0) { errno = -ret; ret = -1; } } else { ret = chroot(root); } } int olderrno = errno; (void) cap_clear_flag(working, CAP_EFFECTIVE); (void) _cap_set_proc(sc, working); (void) cap_free(working); errno = olderrno; return ret; } /* * _cap_launch is invoked in the forked child, it cannot return but is * required to exit. If the execve fails, it will write the errno value * over the filedescriptor, fd, and exit with status 0. */ __attribute__ ((noreturn)) static void _cap_launch(int fd, cap_launch_t attr, void *detail) { struct syscaller_s *sc = &singlethread; if (attr->custom_setup_fn && attr->custom_setup_fn(detail)) { goto defer; } if (attr->change_uids && _cap_setuid(sc, attr->uid)) { goto defer; } if (attr->change_gids && _cap_setgroups(sc, attr->gid, attr->ngroups, attr->groups)) { goto defer; } if (attr->change_mode && _cap_set_mode(sc, attr->mode)) { goto defer; } if (attr->iab && _cap_iab_set_proc(sc, attr->iab)) { goto defer; } if (attr->chroot != NULL && _cap_chroot(sc, attr->chroot)) { goto defer; } /* * Some type wrangling to work around what the kernel API really * means: not "const char **". */ const void *temp_args = attr->argv; const void *temp_envp = attr->envp; execve(attr->arg0, temp_args, temp_envp); /* if the exec worked, execution will not reach here */ defer: /* * getting here means an error has occurred and errno is * communicated to the parent */ for (;;) { int n = write(fd, &errno, sizeof(errno)); if (n < 0 && errno == EAGAIN) { continue; } break; } close(fd); exit(1); } /* * cap_launch performs a wrapped fork+exec that works in both an * unthreaded environment and also where libcap is linked with * psx+pthreads. The function supports dropping privilege in the * forked thread, but retaining privilege in the parent thread(s). * * Since the ambient set is fragile with respect to changes in I or P, * the function carefully orders setting of these inheritable * characteristics, to make sure they stick, or return an error * of -1 setting errno because the launch failed. */ pid_t cap_launch(cap_launch_t attr, void *data) { int my_errno; int ps[2]; if (pipe2(ps, O_CLOEXEC) != 0) { return -1; } int child = fork(); my_errno = errno; close(ps[1]); if (child < 0) { goto defer; } if (!child) { close(ps[0]); /* noreturn from this function: */ _cap_launch(ps[1], attr, data); } /* * Extend this function's return codes to include setup failures * in the child. */ for (;;) { int ignored; int n = read(ps[0], &my_errno, sizeof(my_errno)); if (n == 0) { goto defer; } if (n < 0 && errno == EAGAIN) { continue; } waitpid(child, &ignored, 0); child = -1; my_errno = ECHILD; break; } defer: close(ps[0]); errno = my_errno; return (pid_t) child; }