• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Authors: Gregory P. Smith & Jeffrey Yasskin */
2 #include "Python.h"
3 #include "pycore_fileutils.h"
4 #if defined(HAVE_PIPE2) && !defined(_GNU_SOURCE)
5 # define _GNU_SOURCE
6 #endif
7 #include <unistd.h>
8 #include <fcntl.h>
9 #ifdef HAVE_SYS_TYPES_H
10 #include <sys/types.h>
11 #endif
12 #if defined(HAVE_SYS_STAT_H)
13 #include <sys/stat.h>
14 #endif
15 #ifdef HAVE_SYS_SYSCALL_H
16 #include <sys/syscall.h>
17 #endif
18 #if defined(HAVE_SYS_RESOURCE_H)
19 #include <sys/resource.h>
20 #endif
21 #ifdef HAVE_DIRENT_H
22 #include <dirent.h>
23 #endif
24 #ifdef HAVE_GRP_H
25 #include <grp.h>
26 #endif /* HAVE_GRP_H */
27 
28 #include "posixmodule.h"
29 
30 #ifdef _Py_MEMORY_SANITIZER
31 # include <sanitizer/msan_interface.h>
32 #endif
33 
34 #if defined(__ANDROID__) && __ANDROID_API__ < 21 && !defined(SYS_getdents64)
35 # include <sys/linux-syscalls.h>
36 # define SYS_getdents64  __NR_getdents64
37 #endif
38 
39 #if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \
40     defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK)
41 /* If this is ever expanded to non-Linux platforms, verify what calls are
42  * allowed after vfork(). Ex: setsid() may be disallowed on macOS? */
43 # include <signal.h>
44 # define VFORK_USABLE 1
45 #endif
46 
47 #if defined(__sun) && defined(__SVR4)
48 /* readdir64 is used to work around Solaris 9 bug 6395699. */
49 # define readdir readdir64
50 # define dirent dirent64
51 # if !defined(HAVE_DIRFD)
52 /* Some versions of Solaris lack dirfd(). */
53 #  define dirfd(dirp) ((dirp)->dd_fd)
54 #  define HAVE_DIRFD
55 # endif
56 #endif
57 
58 #if defined(__FreeBSD__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__DragonFly__)
59 # define FD_DIR "/dev/fd"
60 #else
61 # define FD_DIR "/proc/self/fd"
62 #endif
63 
64 #ifdef NGROUPS_MAX
65 #define MAX_GROUPS NGROUPS_MAX
66 #else
67 #define MAX_GROUPS 64
68 #endif
69 
70 #define POSIX_CALL(call)   do { if ((call) == -1) goto error; } while (0)
71 
72 static struct PyModuleDef _posixsubprocessmodule;
73 
74 /* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */
75 static int
_pos_int_from_ascii(const char * name)76 _pos_int_from_ascii(const char *name)
77 {
78     int num = 0;
79     while (*name >= '0' && *name <= '9') {
80         num = num * 10 + (*name - '0');
81         ++name;
82     }
83     if (*name)
84         return -1;  /* Non digit found, not a number. */
85     return num;
86 }
87 
88 
89 #if defined(__FreeBSD__) || defined(__DragonFly__)
90 /* When /dev/fd isn't mounted it is often a static directory populated
91  * with 0 1 2 or entries for 0 .. 63 on FreeBSD, NetBSD, OpenBSD and DragonFlyBSD.
92  * NetBSD and OpenBSD have a /proc fs available (though not necessarily
93  * mounted) and do not have fdescfs for /dev/fd.  MacOS X has a devfs
94  * that properly supports /dev/fd.
95  */
96 static int
_is_fdescfs_mounted_on_dev_fd(void)97 _is_fdescfs_mounted_on_dev_fd(void)
98 {
99     struct stat dev_stat;
100     struct stat dev_fd_stat;
101     if (stat("/dev", &dev_stat) != 0)
102         return 0;
103     if (stat(FD_DIR, &dev_fd_stat) != 0)
104         return 0;
105     if (dev_stat.st_dev == dev_fd_stat.st_dev)
106         return 0;  /* / == /dev == /dev/fd means it is static. #fail */
107     return 1;
108 }
109 #endif
110 
111 
112 /* Returns 1 if there is a problem with fd_sequence, 0 otherwise. */
113 static int
_sanity_check_python_fd_sequence(PyObject * fd_sequence)114 _sanity_check_python_fd_sequence(PyObject *fd_sequence)
115 {
116     Py_ssize_t seq_idx;
117     long prev_fd = -1;
118     for (seq_idx = 0; seq_idx < PyTuple_GET_SIZE(fd_sequence); ++seq_idx) {
119         PyObject* py_fd = PyTuple_GET_ITEM(fd_sequence, seq_idx);
120         long iter_fd;
121         if (!PyLong_Check(py_fd)) {
122             return 1;
123         }
124         iter_fd = PyLong_AsLong(py_fd);
125         if (iter_fd < 0 || iter_fd <= prev_fd || iter_fd > INT_MAX) {
126             /* Negative, overflow, unsorted, too big for a fd. */
127             return 1;
128         }
129         prev_fd = iter_fd;
130     }
131     return 0;
132 }
133 
134 
135 /* Is fd found in the sorted Python Sequence? */
136 static int
_is_fd_in_sorted_fd_sequence(int fd,PyObject * fd_sequence)137 _is_fd_in_sorted_fd_sequence(int fd, PyObject *fd_sequence)
138 {
139     /* Binary search. */
140     Py_ssize_t search_min = 0;
141     Py_ssize_t search_max = PyTuple_GET_SIZE(fd_sequence) - 1;
142     if (search_max < 0)
143         return 0;
144     do {
145         long middle = (search_min + search_max) / 2;
146         long middle_fd = PyLong_AsLong(PyTuple_GET_ITEM(fd_sequence, middle));
147         if (fd == middle_fd)
148             return 1;
149         if (fd > middle_fd)
150             search_min = middle + 1;
151         else
152             search_max = middle - 1;
153     } while (search_min <= search_max);
154     return 0;
155 }
156 
157 static int
make_inheritable(PyObject * py_fds_to_keep,int errpipe_write)158 make_inheritable(PyObject *py_fds_to_keep, int errpipe_write)
159 {
160     Py_ssize_t i, len;
161 
162     len = PyTuple_GET_SIZE(py_fds_to_keep);
163     for (i = 0; i < len; ++i) {
164         PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i);
165         long fd = PyLong_AsLong(fdobj);
166         assert(!PyErr_Occurred());
167         assert(0 <= fd && fd <= INT_MAX);
168         if (fd == errpipe_write) {
169             /* errpipe_write is part of py_fds_to_keep. It must be closed at
170                exec(), but kept open in the child process until exec() is
171                called. */
172             continue;
173         }
174         if (_Py_set_inheritable_async_safe((int)fd, 1, NULL) < 0)
175             return -1;
176     }
177     return 0;
178 }
179 
180 
181 /* Get the maximum file descriptor that could be opened by this process.
182  * This function is async signal safe for use between fork() and exec().
183  */
184 static long
safe_get_max_fd(void)185 safe_get_max_fd(void)
186 {
187     long local_max_fd;
188 #if defined(__NetBSD__)
189     local_max_fd = fcntl(0, F_MAXFD);
190     if (local_max_fd >= 0)
191         return local_max_fd;
192 #endif
193 #if defined(HAVE_SYS_RESOURCE_H) && defined(__OpenBSD__)
194     struct rlimit rl;
195     /* Not on the POSIX async signal safe functions list but likely
196      * safe.  TODO - Someone should audit OpenBSD to make sure. */
197     if (getrlimit(RLIMIT_NOFILE, &rl) >= 0)
198         return (long) rl.rlim_max;
199 #endif
200 #ifdef _SC_OPEN_MAX
201     local_max_fd = sysconf(_SC_OPEN_MAX);
202     if (local_max_fd == -1)
203 #endif
204         local_max_fd = 256;  /* Matches legacy Lib/subprocess.py behavior. */
205     return local_max_fd;
206 }
207 
208 
209 /* Close all file descriptors in the range from start_fd and higher
210  * except for those in py_fds_to_keep.  If the range defined by
211  * [start_fd, safe_get_max_fd()) is large this will take a long
212  * time as it calls close() on EVERY possible fd.
213  *
214  * It isn't possible to know for sure what the max fd to go up to
215  * is for processes with the capability of raising their maximum.
216  */
217 static void
_close_fds_by_brute_force(long start_fd,PyObject * py_fds_to_keep)218 _close_fds_by_brute_force(long start_fd, PyObject *py_fds_to_keep)
219 {
220     long end_fd = safe_get_max_fd();
221     Py_ssize_t num_fds_to_keep = PyTuple_GET_SIZE(py_fds_to_keep);
222     Py_ssize_t keep_seq_idx;
223     /* As py_fds_to_keep is sorted we can loop through the list closing
224      * fds in between any in the keep list falling within our range. */
225     for (keep_seq_idx = 0; keep_seq_idx < num_fds_to_keep; ++keep_seq_idx) {
226         PyObject* py_keep_fd = PyTuple_GET_ITEM(py_fds_to_keep, keep_seq_idx);
227         int keep_fd = PyLong_AsLong(py_keep_fd);
228         if (keep_fd < start_fd)
229             continue;
230         _Py_closerange(start_fd, keep_fd - 1);
231         start_fd = keep_fd + 1;
232     }
233     if (start_fd <= end_fd) {
234         _Py_closerange(start_fd, end_fd);
235     }
236 }
237 
238 
239 #if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
240 /* It doesn't matter if d_name has room for NAME_MAX chars; we're using this
241  * only to read a directory of short file descriptor number names.  The kernel
242  * will return an error if we didn't give it enough space.  Highly Unlikely.
243  * This structure is very old and stable: It will not change unless the kernel
244  * chooses to break compatibility with all existing binaries.  Highly Unlikely.
245  */
246 struct linux_dirent64 {
247    unsigned long long d_ino;
248    long long d_off;
249    unsigned short d_reclen;     /* Length of this linux_dirent */
250    unsigned char  d_type;
251    char           d_name[256];  /* Filename (null-terminated) */
252 };
253 
254 /* Close all open file descriptors in the range from start_fd and higher
255  * Do not close any in the sorted py_fds_to_keep list.
256  *
257  * This version is async signal safe as it does not make any unsafe C library
258  * calls, malloc calls or handle any locks.  It is _unfortunate_ to be forced
259  * to resort to making a kernel system call directly but this is the ONLY api
260  * available that does no harm.  opendir/readdir/closedir perform memory
261  * allocation and locking so while they usually work they are not guaranteed
262  * to (especially if you have replaced your malloc implementation).  A version
263  * of this function that uses those can be found in the _maybe_unsafe variant.
264  *
265  * This is Linux specific because that is all I am ready to test it on.  It
266  * should be easy to add OS specific dirent or dirent64 structures and modify
267  * it with some cpp #define magic to work on other OSes as well if you want.
268  */
269 static void
_close_open_fds_safe(int start_fd,PyObject * py_fds_to_keep)270 _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
271 {
272     int fd_dir_fd;
273 
274     fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
275     if (fd_dir_fd == -1) {
276         /* No way to get a list of open fds. */
277         _close_fds_by_brute_force(start_fd, py_fds_to_keep);
278         return;
279     } else {
280         char buffer[sizeof(struct linux_dirent64)];
281         int bytes;
282         while ((bytes = syscall(SYS_getdents64, fd_dir_fd,
283                                 (struct linux_dirent64 *)buffer,
284                                 sizeof(buffer))) > 0) {
285             struct linux_dirent64 *entry;
286             int offset;
287 #ifdef _Py_MEMORY_SANITIZER
288             __msan_unpoison(buffer, bytes);
289 #endif
290             for (offset = 0; offset < bytes; offset += entry->d_reclen) {
291                 int fd;
292                 entry = (struct linux_dirent64 *)(buffer + offset);
293                 if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)
294                     continue;  /* Not a number. */
295                 if (fd != fd_dir_fd && fd >= start_fd &&
296                     !_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
297                     close(fd);
298                 }
299             }
300         }
301         close(fd_dir_fd);
302     }
303 }
304 
305 #define _close_open_fds _close_open_fds_safe
306 
307 #else  /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
308 
309 
310 /* Close all open file descriptors from start_fd and higher.
311  * Do not close any in the sorted py_fds_to_keep tuple.
312  *
313  * This function violates the strict use of async signal safe functions. :(
314  * It calls opendir(), readdir() and closedir().  Of these, the one most
315  * likely to ever cause a problem is opendir() as it performs an internal
316  * malloc().  Practically this should not be a problem.  The Java VM makes the
317  * same calls between fork and exec in its own UNIXProcess_md.c implementation.
318  *
319  * readdir_r() is not used because it provides no benefit.  It is typically
320  * implemented as readdir() followed by memcpy().  See also:
321  *   http://womble.decadent.org.uk/readdir_r-advisory.html
322  */
323 static void
_close_open_fds_maybe_unsafe(long start_fd,PyObject * py_fds_to_keep)324 _close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
325 {
326     DIR *proc_fd_dir;
327 #ifndef HAVE_DIRFD
328     while (_is_fd_in_sorted_fd_sequence(start_fd, py_fds_to_keep)) {
329         ++start_fd;
330     }
331     /* Close our lowest fd before we call opendir so that it is likely to
332      * reuse that fd otherwise we might close opendir's file descriptor in
333      * our loop.  This trick assumes that fd's are allocated on a lowest
334      * available basis. */
335     close(start_fd);
336     ++start_fd;
337 #endif
338 
339 #if defined(__FreeBSD__) || defined(__DragonFly__)
340     if (!_is_fdescfs_mounted_on_dev_fd())
341         proc_fd_dir = NULL;
342     else
343 #endif
344         proc_fd_dir = opendir(FD_DIR);
345     if (!proc_fd_dir) {
346         /* No way to get a list of open fds. */
347         _close_fds_by_brute_force(start_fd, py_fds_to_keep);
348     } else {
349         struct dirent *dir_entry;
350 #ifdef HAVE_DIRFD
351         int fd_used_by_opendir = dirfd(proc_fd_dir);
352 #else
353         int fd_used_by_opendir = start_fd - 1;
354 #endif
355         errno = 0;
356         while ((dir_entry = readdir(proc_fd_dir))) {
357             int fd;
358             if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
359                 continue;  /* Not a number. */
360             if (fd != fd_used_by_opendir && fd >= start_fd &&
361                 !_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
362                 close(fd);
363             }
364             errno = 0;
365         }
366         if (errno) {
367             /* readdir error, revert behavior. Highly Unlikely. */
368             _close_fds_by_brute_force(start_fd, py_fds_to_keep);
369         }
370         closedir(proc_fd_dir);
371     }
372 }
373 
374 #define _close_open_fds _close_open_fds_maybe_unsafe
375 
376 #endif  /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
377 
378 
379 #ifdef VFORK_USABLE
380 /* Reset dispositions for all signals to SIG_DFL except for ignored
381  * signals. This way we ensure that no signal handlers can run
382  * after we unblock signals in a child created by vfork().
383  */
384 static void
reset_signal_handlers(const sigset_t * child_sigmask)385 reset_signal_handlers(const sigset_t *child_sigmask)
386 {
387     struct sigaction sa_dfl = {.sa_handler = SIG_DFL};
388     for (int sig = 1; sig < _NSIG; sig++) {
389         /* Dispositions for SIGKILL and SIGSTOP can't be changed. */
390         if (sig == SIGKILL || sig == SIGSTOP) {
391             continue;
392         }
393 
394         /* There is no need to reset the disposition of signals that will
395          * remain blocked across execve() since the kernel will do it. */
396         if (sigismember(child_sigmask, sig) == 1) {
397             continue;
398         }
399 
400         struct sigaction sa;
401         /* C libraries usually return EINVAL for signals used
402          * internally (e.g. for thread cancellation), so simply
403          * skip errors here. */
404         if (sigaction(sig, NULL, &sa) == -1) {
405             continue;
406         }
407 
408         /* void *h works as these fields are both pointer types already. */
409         void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction :
410                                               (void *)sa.sa_handler);
411         if (h == SIG_IGN || h == SIG_DFL) {
412             continue;
413         }
414 
415         /* This call can't reasonably fail, but if it does, terminating
416          * the child seems to be too harsh, so ignore errors. */
417         (void) sigaction(sig, &sa_dfl, NULL);
418     }
419 }
420 #endif /* VFORK_USABLE */
421 
422 
423 /*
424  * This function is code executed in the child process immediately after
425  * (v)fork to set things up and call exec().
426  *
427  * All of the code in this function must only use async-signal-safe functions,
428  * listed at `man 7 signal` or
429  * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.
430  *
431  * This restriction is documented at
432  * http://www.opengroup.org/onlinepubs/009695399/functions/fork.html.
433  *
434  * If this function is called after vfork(), even more care must be taken.
435  * The lack of preparations that C libraries normally take on fork(),
436  * as well as sharing the address space with the parent, might make even
437  * async-signal-safe functions vfork-unsafe. In particular, on Linux,
438  * set*id() and setgroups() library functions must not be called, since
439  * they have to interact with the library-level thread list and send
440  * library-internal signals to implement per-process credentials semantics
441  * required by POSIX but not supported natively on Linux. Another reason to
442  * avoid this family of functions is that sharing an address space between
443  * processes running with different privileges is inherently insecure.
444  * See bpo-35823 for further discussion and references.
445  *
446  * In some C libraries, setrlimit() has the same thread list/signalling
447  * behavior since resource limits were per-thread attributes before
448  * Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue
449  * (https://www.openwall.com/lists/musl/2020/10/15/6).
450  *
451  * If vfork-unsafe functionality is desired after vfork(), consider using
452  * syscall() to obtain it.
453  */
454 _Py_NO_INLINE static void
child_exec(char * const exec_array[],char * const argv[],char * const envp[],const char * cwd,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int close_fds,int restore_signals,int call_setsid,int call_setgid,gid_t gid,int call_setgroups,size_t groups_size,const gid_t * groups,int call_setuid,uid_t uid,int child_umask,const void * child_sigmask,PyObject * py_fds_to_keep,PyObject * preexec_fn,PyObject * preexec_fn_args_tuple)455 child_exec(char *const exec_array[],
456            char *const argv[],
457            char *const envp[],
458            const char *cwd,
459            int p2cread, int p2cwrite,
460            int c2pread, int c2pwrite,
461            int errread, int errwrite,
462            int errpipe_read, int errpipe_write,
463            int close_fds, int restore_signals,
464            int call_setsid,
465            int call_setgid, gid_t gid,
466            int call_setgroups, size_t groups_size, const gid_t *groups,
467            int call_setuid, uid_t uid, int child_umask,
468            const void *child_sigmask,
469            PyObject *py_fds_to_keep,
470            PyObject *preexec_fn,
471            PyObject *preexec_fn_args_tuple)
472 {
473     int i, saved_errno, reached_preexec = 0;
474     PyObject *result;
475     const char* err_msg = "";
476     /* Buffer large enough to hold a hex integer.  We can't malloc. */
477     char hex_errno[sizeof(saved_errno)*2+1];
478 
479     if (make_inheritable(py_fds_to_keep, errpipe_write) < 0)
480         goto error;
481 
482     /* Close parent's pipe ends. */
483     if (p2cwrite != -1)
484         POSIX_CALL(close(p2cwrite));
485     if (c2pread != -1)
486         POSIX_CALL(close(c2pread));
487     if (errread != -1)
488         POSIX_CALL(close(errread));
489     POSIX_CALL(close(errpipe_read));
490 
491     /* When duping fds, if there arises a situation where one of the fds is
492        either 0, 1 or 2, it is possible that it is overwritten (#12607). */
493     if (c2pwrite == 0) {
494         POSIX_CALL(c2pwrite = dup(c2pwrite));
495         /* issue32270 */
496         if (_Py_set_inheritable_async_safe(c2pwrite, 0, NULL) < 0) {
497             goto error;
498         }
499     }
500     while (errwrite == 0 || errwrite == 1) {
501         POSIX_CALL(errwrite = dup(errwrite));
502         /* issue32270 */
503         if (_Py_set_inheritable_async_safe(errwrite, 0, NULL) < 0) {
504             goto error;
505         }
506     }
507 
508     /* Dup fds for child.
509        dup2() removes the CLOEXEC flag but we must do it ourselves if dup2()
510        would be a no-op (issue #10806). */
511     if (p2cread == 0) {
512         if (_Py_set_inheritable_async_safe(p2cread, 1, NULL) < 0)
513             goto error;
514     }
515     else if (p2cread != -1)
516         POSIX_CALL(dup2(p2cread, 0));  /* stdin */
517 
518     if (c2pwrite == 1) {
519         if (_Py_set_inheritable_async_safe(c2pwrite, 1, NULL) < 0)
520             goto error;
521     }
522     else if (c2pwrite != -1)
523         POSIX_CALL(dup2(c2pwrite, 1));  /* stdout */
524 
525     if (errwrite == 2) {
526         if (_Py_set_inheritable_async_safe(errwrite, 1, NULL) < 0)
527             goto error;
528     }
529     else if (errwrite != -1)
530         POSIX_CALL(dup2(errwrite, 2));  /* stderr */
531 
532     /* We no longer manually close p2cread, c2pwrite, and errwrite here as
533      * _close_open_fds takes care when it is not already non-inheritable. */
534 
535     if (cwd)
536         POSIX_CALL(chdir(cwd));
537 
538     if (child_umask >= 0)
539         umask(child_umask);  /* umask() always succeeds. */
540 
541     if (restore_signals)
542         _Py_RestoreSignals();
543 
544 #ifdef VFORK_USABLE
545     if (child_sigmask) {
546         reset_signal_handlers(child_sigmask);
547         if ((errno = pthread_sigmask(SIG_SETMASK, child_sigmask, NULL))) {
548             goto error;
549         }
550     }
551 #endif
552 
553 #ifdef HAVE_SETSID
554     if (call_setsid)
555         POSIX_CALL(setsid());
556 #endif
557 
558 #ifdef HAVE_SETGROUPS
559     if (call_setgroups)
560         POSIX_CALL(setgroups(groups_size, groups));
561 #endif /* HAVE_SETGROUPS */
562 
563 #ifdef HAVE_SETREGID
564     if (call_setgid)
565         POSIX_CALL(setregid(gid, gid));
566 #endif /* HAVE_SETREGID */
567 
568 #ifdef HAVE_SETREUID
569     if (call_setuid)
570         POSIX_CALL(setreuid(uid, uid));
571 #endif /* HAVE_SETREUID */
572 
573 
574     reached_preexec = 1;
575     if (preexec_fn != Py_None && preexec_fn_args_tuple) {
576         /* This is where the user has asked us to deadlock their program. */
577         result = PyObject_Call(preexec_fn, preexec_fn_args_tuple, NULL);
578         if (result == NULL) {
579             /* Stringifying the exception or traceback would involve
580              * memory allocation and thus potential for deadlock.
581              * We've already faced potential deadlock by calling back
582              * into Python in the first place, so it probably doesn't
583              * matter but we avoid it to minimize the possibility. */
584             err_msg = "Exception occurred in preexec_fn.";
585             errno = 0;  /* We don't want to report an OSError. */
586             goto error;
587         }
588         /* Py_DECREF(result); - We're about to exec so why bother? */
589     }
590 
591     /* close FDs after executing preexec_fn, which might open FDs */
592     if (close_fds) {
593         /* TODO HP-UX could use pstat_getproc() if anyone cares about it. */
594         _close_open_fds(3, py_fds_to_keep);
595     }
596 
597     /* This loop matches the Lib/os.py _execvpe()'s PATH search when */
598     /* given the executable_list generated by Lib/subprocess.py.     */
599     saved_errno = 0;
600     for (i = 0; exec_array[i] != NULL; ++i) {
601         const char *executable = exec_array[i];
602         if (envp) {
603             execve(executable, argv, envp);
604         } else {
605             execv(executable, argv);
606         }
607         if (errno != ENOENT && errno != ENOTDIR && saved_errno == 0) {
608             saved_errno = errno;
609         }
610     }
611     /* Report the first exec error, not the last. */
612     if (saved_errno)
613         errno = saved_errno;
614 
615 error:
616     saved_errno = errno;
617     /* Report the posix error to our parent process. */
618     /* We ignore all write() return values as the total size of our writes is
619        less than PIPEBUF and we cannot do anything about an error anyways.
620        Use _Py_write_noraise() to retry write() if it is interrupted by a
621        signal (fails with EINTR). */
622     if (saved_errno) {
623         char *cur;
624         _Py_write_noraise(errpipe_write, "OSError:", 8);
625         cur = hex_errno + sizeof(hex_errno);
626         while (saved_errno != 0 && cur != hex_errno) {
627             *--cur = Py_hexdigits[saved_errno % 16];
628             saved_errno /= 16;
629         }
630         _Py_write_noraise(errpipe_write, cur, hex_errno + sizeof(hex_errno) - cur);
631         _Py_write_noraise(errpipe_write, ":", 1);
632         if (!reached_preexec) {
633             /* Indicate to the parent that the error happened before exec(). */
634             _Py_write_noraise(errpipe_write, "noexec", 6);
635         }
636         /* We can't call strerror(saved_errno).  It is not async signal safe.
637          * The parent process will look the error message up. */
638     } else {
639         _Py_write_noraise(errpipe_write, "SubprocessError:0:", 18);
640         _Py_write_noraise(errpipe_write, err_msg, strlen(err_msg));
641     }
642 }
643 
644 
645 /* The main purpose of this wrapper function is to isolate vfork() from both
646  * subprocess_fork_exec() and child_exec(). A child process created via
647  * vfork() executes on the same stack as the parent process while the latter is
648  * suspended, so this function should not be inlined to avoid compiler bugs
649  * that might clobber data needed by the parent later. Additionally,
650  * child_exec() should not be inlined to avoid spurious -Wclobber warnings from
651  * GCC (see bpo-35823).
652  */
653 _Py_NO_INLINE static pid_t
do_fork_exec(char * const exec_array[],char * const argv[],char * const envp[],const char * cwd,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int close_fds,int restore_signals,int call_setsid,int call_setgid,gid_t gid,int call_setgroups,size_t groups_size,const gid_t * groups,int call_setuid,uid_t uid,int child_umask,const void * child_sigmask,PyObject * py_fds_to_keep,PyObject * preexec_fn,PyObject * preexec_fn_args_tuple)654 do_fork_exec(char *const exec_array[],
655              char *const argv[],
656              char *const envp[],
657              const char *cwd,
658              int p2cread, int p2cwrite,
659              int c2pread, int c2pwrite,
660              int errread, int errwrite,
661              int errpipe_read, int errpipe_write,
662              int close_fds, int restore_signals,
663              int call_setsid,
664              int call_setgid, gid_t gid,
665              int call_setgroups, size_t groups_size, const gid_t *groups,
666              int call_setuid, uid_t uid, int child_umask,
667              const void *child_sigmask,
668              PyObject *py_fds_to_keep,
669              PyObject *preexec_fn,
670              PyObject *preexec_fn_args_tuple)
671 {
672 
673     pid_t pid;
674 
675 #ifdef VFORK_USABLE
676     if (child_sigmask) {
677         /* These are checked by our caller; verify them in debug builds. */
678         assert(!call_setuid);
679         assert(!call_setgid);
680         assert(!call_setgroups);
681         assert(preexec_fn == Py_None);
682 
683         pid = vfork();
684     } else
685 #endif
686     {
687         pid = fork();
688     }
689 
690     if (pid != 0) {
691         return pid;
692     }
693 
694     /* Child process.
695      * See the comment above child_exec() for restrictions imposed on
696      * the code below.
697      */
698 
699     if (preexec_fn != Py_None) {
700         /* We'll be calling back into Python later so we need to do this.
701          * This call may not be async-signal-safe but neither is calling
702          * back into Python.  The user asked us to use hope as a strategy
703          * to avoid deadlock... */
704         PyOS_AfterFork_Child();
705     }
706 
707     child_exec(exec_array, argv, envp, cwd,
708                p2cread, p2cwrite, c2pread, c2pwrite,
709                errread, errwrite, errpipe_read, errpipe_write,
710                close_fds, restore_signals, call_setsid,
711                call_setgid, gid, call_setgroups, groups_size, groups,
712                call_setuid, uid, child_umask, child_sigmask,
713                py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
714     _exit(255);
715     return 0;  /* Dead code to avoid a potential compiler warning. */
716 }
717 
718 
719 static PyObject *
subprocess_fork_exec(PyObject * module,PyObject * args)720 subprocess_fork_exec(PyObject *module, PyObject *args)
721 {
722     PyObject *gc_module = NULL;
723     PyObject *executable_list, *py_fds_to_keep;
724     PyObject *env_list, *preexec_fn;
725     PyObject *process_args, *converted_args = NULL, *fast_args = NULL;
726     PyObject *preexec_fn_args_tuple = NULL;
727     PyObject *groups_list;
728     PyObject *uid_object, *gid_object;
729     int p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite;
730     int errpipe_read, errpipe_write, close_fds, restore_signals;
731     int call_setsid;
732     int call_setgid = 0, call_setgroups = 0, call_setuid = 0;
733     uid_t uid;
734     gid_t gid, *groups = NULL;
735     int child_umask;
736     PyObject *cwd_obj, *cwd_obj2 = NULL;
737     const char *cwd;
738     pid_t pid = -1;
739     int need_to_reenable_gc = 0;
740     char *const *exec_array, *const *argv = NULL, *const *envp = NULL;
741     Py_ssize_t arg_num, num_groups = 0;
742     int need_after_fork = 0;
743     int saved_errno = 0;
744 
745     if (!PyArg_ParseTuple(
746             args, "OOpO!OOiiiiiiiiiiOOOiO:fork_exec",
747             &process_args, &executable_list,
748             &close_fds, &PyTuple_Type, &py_fds_to_keep,
749             &cwd_obj, &env_list,
750             &p2cread, &p2cwrite, &c2pread, &c2pwrite,
751             &errread, &errwrite, &errpipe_read, &errpipe_write,
752             &restore_signals, &call_setsid,
753             &gid_object, &groups_list, &uid_object, &child_umask,
754             &preexec_fn))
755         return NULL;
756 
757     if ((preexec_fn != Py_None) &&
758             (PyInterpreterState_Get() != PyInterpreterState_Main())) {
759         PyErr_SetString(PyExc_RuntimeError,
760                         "preexec_fn not supported within subinterpreters");
761         return NULL;
762     }
763 
764     if (close_fds && errpipe_write < 3) {  /* precondition */
765         PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3");
766         return NULL;
767     }
768     if (_sanity_check_python_fd_sequence(py_fds_to_keep)) {
769         PyErr_SetString(PyExc_ValueError, "bad value(s) in fds_to_keep");
770         return NULL;
771     }
772 
773     PyInterpreterState *interp = PyInterpreterState_Get();
774     const PyConfig *config = _PyInterpreterState_GetConfig(interp);
775     if (config->_isolated_interpreter) {
776         PyErr_SetString(PyExc_RuntimeError,
777                         "subprocess not supported for isolated subinterpreters");
778         return NULL;
779     }
780 
781     /* We need to call gc.disable() when we'll be calling preexec_fn */
782     if (preexec_fn != Py_None) {
783         need_to_reenable_gc = PyGC_Disable();
784     }
785 
786     exec_array = _PySequence_BytesToCharpArray(executable_list);
787     if (!exec_array)
788         goto cleanup;
789 
790     /* Convert args and env into appropriate arguments for exec() */
791     /* These conversions are done in the parent process to avoid allocating
792        or freeing memory in the child process. */
793     if (process_args != Py_None) {
794         Py_ssize_t num_args;
795         /* Equivalent to:  */
796         /*  tuple(PyUnicode_FSConverter(arg) for arg in process_args)  */
797         fast_args = PySequence_Fast(process_args, "argv must be a tuple");
798         if (fast_args == NULL)
799             goto cleanup;
800         num_args = PySequence_Fast_GET_SIZE(fast_args);
801         converted_args = PyTuple_New(num_args);
802         if (converted_args == NULL)
803             goto cleanup;
804         for (arg_num = 0; arg_num < num_args; ++arg_num) {
805             PyObject *borrowed_arg, *converted_arg;
806             if (PySequence_Fast_GET_SIZE(fast_args) != num_args) {
807                 PyErr_SetString(PyExc_RuntimeError, "args changed during iteration");
808                 goto cleanup;
809             }
810             borrowed_arg = PySequence_Fast_GET_ITEM(fast_args, arg_num);
811             if (PyUnicode_FSConverter(borrowed_arg, &converted_arg) == 0)
812                 goto cleanup;
813             PyTuple_SET_ITEM(converted_args, arg_num, converted_arg);
814         }
815 
816         argv = _PySequence_BytesToCharpArray(converted_args);
817         Py_CLEAR(converted_args);
818         Py_CLEAR(fast_args);
819         if (!argv)
820             goto cleanup;
821     }
822 
823     if (env_list != Py_None) {
824         envp = _PySequence_BytesToCharpArray(env_list);
825         if (!envp)
826             goto cleanup;
827     }
828 
829     if (cwd_obj != Py_None) {
830         if (PyUnicode_FSConverter(cwd_obj, &cwd_obj2) == 0)
831             goto cleanup;
832         cwd = PyBytes_AsString(cwd_obj2);
833     } else {
834         cwd = NULL;
835     }
836 
837     if (groups_list != Py_None) {
838 #ifdef HAVE_SETGROUPS
839         Py_ssize_t i;
840         gid_t gid;
841 
842         if (!PyList_Check(groups_list)) {
843             PyErr_SetString(PyExc_TypeError,
844                     "setgroups argument must be a list");
845             goto cleanup;
846         }
847         num_groups = PySequence_Size(groups_list);
848 
849         if (num_groups < 0)
850             goto cleanup;
851 
852         if (num_groups > MAX_GROUPS) {
853             PyErr_SetString(PyExc_ValueError, "too many groups");
854             goto cleanup;
855         }
856 
857         if ((groups = PyMem_RawMalloc(num_groups * sizeof(gid_t))) == NULL) {
858             PyErr_SetString(PyExc_MemoryError,
859                     "failed to allocate memory for group list");
860             goto cleanup;
861         }
862 
863         for (i = 0; i < num_groups; i++) {
864             PyObject *elem;
865             elem = PySequence_GetItem(groups_list, i);
866             if (!elem)
867                 goto cleanup;
868             if (!PyLong_Check(elem)) {
869                 PyErr_SetString(PyExc_TypeError,
870                                 "groups must be integers");
871                 Py_DECREF(elem);
872                 goto cleanup;
873             } else {
874                 if (!_Py_Gid_Converter(elem, &gid)) {
875                     Py_DECREF(elem);
876                     PyErr_SetString(PyExc_ValueError, "invalid group id");
877                     goto cleanup;
878                 }
879                 groups[i] = gid;
880             }
881             Py_DECREF(elem);
882         }
883         call_setgroups = 1;
884 
885 #else /* HAVE_SETGROUPS */
886         PyErr_BadInternalCall();
887         goto cleanup;
888 #endif /* HAVE_SETGROUPS */
889     }
890 
891     if (gid_object != Py_None) {
892 #ifdef HAVE_SETREGID
893         if (!_Py_Gid_Converter(gid_object, &gid))
894             goto cleanup;
895 
896         call_setgid = 1;
897 
898 #else /* HAVE_SETREGID */
899         PyErr_BadInternalCall();
900         goto cleanup;
901 #endif /* HAVE_SETREUID */
902     }
903 
904     if (uid_object != Py_None) {
905 #ifdef HAVE_SETREUID
906         if (!_Py_Uid_Converter(uid_object, &uid))
907             goto cleanup;
908 
909         call_setuid = 1;
910 
911 #else /* HAVE_SETREUID */
912         PyErr_BadInternalCall();
913         goto cleanup;
914 #endif /* HAVE_SETREUID */
915     }
916 
917     /* This must be the last thing done before fork() because we do not
918      * want to call PyOS_BeforeFork() if there is any chance of another
919      * error leading to the cleanup: code without calling fork(). */
920     if (preexec_fn != Py_None) {
921         preexec_fn_args_tuple = PyTuple_New(0);
922         if (!preexec_fn_args_tuple)
923             goto cleanup;
924         PyOS_BeforeFork();
925         need_after_fork = 1;
926     }
927 
928     /* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */
929     const void *old_sigmask = NULL;
930 #ifdef VFORK_USABLE
931     /* Use vfork() only if it's safe. See the comment above child_exec(). */
932     sigset_t old_sigs;
933     if (preexec_fn == Py_None &&
934         !call_setuid && !call_setgid && !call_setgroups) {
935         /* Block all signals to ensure that no signal handlers are run in the
936          * child process while it shares memory with us. Note that signals
937          * used internally by C libraries won't be blocked by
938          * pthread_sigmask(), but signal handlers installed by C libraries
939          * normally service only signals originating from *within the process*,
940          * so it should be sufficient to consider any library function that
941          * might send such a signal to be vfork-unsafe and do not call it in
942          * the child.
943          */
944         sigset_t all_sigs;
945         sigfillset(&all_sigs);
946         if ((saved_errno = pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs))) {
947             goto cleanup;
948         }
949         old_sigmask = &old_sigs;
950     }
951 #endif
952 
953     pid = do_fork_exec(exec_array, argv, envp, cwd,
954                        p2cread, p2cwrite, c2pread, c2pwrite,
955                        errread, errwrite, errpipe_read, errpipe_write,
956                        close_fds, restore_signals, call_setsid,
957                        call_setgid, gid, call_setgroups, num_groups, groups,
958                        call_setuid, uid, child_umask, old_sigmask,
959                        py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
960 
961     /* Parent (original) process */
962     if (pid == -1) {
963         /* Capture errno for the exception. */
964         saved_errno = errno;
965     }
966 
967 #ifdef VFORK_USABLE
968     if (old_sigmask) {
969         /* vfork() semantics guarantees that the parent is blocked
970          * until the child performs _exit() or execve(), so it is safe
971          * to unblock signals once we're here.
972          * Note that in environments where vfork() is implemented as fork(),
973          * such as QEMU user-mode emulation, the parent won't be blocked,
974          * but it won't share the address space with the child,
975          * so it's still safe to unblock the signals.
976          *
977          * We don't handle errors here because this call can't fail
978          * if valid arguments are given, and because there is no good
979          * way for the caller to deal with a failure to restore
980          * the thread signal mask. */
981         (void) pthread_sigmask(SIG_SETMASK, old_sigmask, NULL);
982     }
983 #endif
984 
985     if (need_after_fork)
986         PyOS_AfterFork_Parent();
987 
988 cleanup:
989     if (saved_errno != 0) {
990         errno = saved_errno;
991         /* We can't call this above as PyOS_AfterFork_Parent() calls back
992          * into Python code which would see the unreturned error. */
993         PyErr_SetFromErrno(PyExc_OSError);
994     }
995 
996     Py_XDECREF(preexec_fn_args_tuple);
997     PyMem_RawFree(groups);
998     Py_XDECREF(cwd_obj2);
999     if (envp)
1000         _Py_FreeCharPArray(envp);
1001     Py_XDECREF(converted_args);
1002     Py_XDECREF(fast_args);
1003     if (argv)
1004         _Py_FreeCharPArray(argv);
1005     if (exec_array)
1006         _Py_FreeCharPArray(exec_array);
1007 
1008     if (need_to_reenable_gc) {
1009         PyGC_Enable();
1010     }
1011     Py_XDECREF(gc_module);
1012 
1013     return pid == -1 ? NULL : PyLong_FromPid(pid);
1014 }
1015 
1016 
1017 PyDoc_STRVAR(subprocess_fork_exec_doc,
1018 "fork_exec(args, executable_list, close_fds, pass_fds, cwd, env,\n\
1019           p2cread, p2cwrite, c2pread, c2pwrite,\n\
1020           errread, errwrite, errpipe_read, errpipe_write,\n\
1021           restore_signals, call_setsid,\n\
1022           gid, groups_list, uid,\n\
1023           preexec_fn)\n\
1024 \n\
1025 Forks a child process, closes parent file descriptors as appropriate in the\n\
1026 child and dups the few that are needed before calling exec() in the child\n\
1027 process.\n\
1028 \n\
1029 If close_fds is true, close file descriptors 3 and higher, except those listed\n\
1030 in the sorted tuple pass_fds.\n\
1031 \n\
1032 The preexec_fn, if supplied, will be called immediately before closing file\n\
1033 descriptors and exec.\n\
1034 WARNING: preexec_fn is NOT SAFE if your application uses threads.\n\
1035          It may trigger infrequent, difficult to debug deadlocks.\n\
1036 \n\
1037 If an error occurs in the child process before the exec, it is\n\
1038 serialized and written to the errpipe_write fd per subprocess.py.\n\
1039 \n\
1040 Returns: the child process's PID.\n\
1041 \n\
1042 Raises: Only on an error in the parent process.\n\
1043 ");
1044 
1045 /* module level code ********************************************************/
1046 
1047 PyDoc_STRVAR(module_doc,
1048 "A POSIX helper for the subprocess module.");
1049 
1050 static PyMethodDef module_methods[] = {
1051     {"fork_exec", subprocess_fork_exec, METH_VARARGS, subprocess_fork_exec_doc},
1052     {NULL, NULL}  /* sentinel */
1053 };
1054 
1055 static PyModuleDef_Slot _posixsubprocess_slots[] = {
1056     {0, NULL}
1057 };
1058 
1059 static struct PyModuleDef _posixsubprocessmodule = {
1060         PyModuleDef_HEAD_INIT,
1061         .m_name = "_posixsubprocess",
1062         .m_doc = module_doc,
1063         .m_size = 0,
1064         .m_methods = module_methods,
1065         .m_slots = _posixsubprocess_slots,
1066 };
1067 
1068 PyMODINIT_FUNC
PyInit__posixsubprocess(void)1069 PyInit__posixsubprocess(void)
1070 {
1071     return PyModuleDef_Init(&_posixsubprocessmodule);
1072 }
1073